In [None]:
# without csv file
import re
import numpy as np
import pandas as pd
import joblib
from googletrans import Translator
from transformers import pipeline
import torch
import lightgbm as lgb
from gensim.models import Word2Vec

# Load models
print("Loading models...")
translator = Translator()
sentiment_model = pipeline("sentiment-analysis")
lgb_model = joblib.load("mental_health_classifier_lgb.pkl")
w2v_model = joblib.load("word2vec_model.pkl")

# Clean text function
def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s\?\!]", "", text)
    text = text.lower().strip()
    return text

# Detect and translate using Google Translate
def detect_and_translate(text):
    try:
        detected_lang = translator.detect(text).lang
        if detected_lang != "en":
            text = translator.translate(text, dest="en").text
        return detected_lang, text
    except Exception as e:
        print(f"Translation failed: {e}")
        return "unknown", text

# Perform sentiment analysis
def analyze_sentiment(text):
    try:
        result = sentiment_model(text)
        return result[0]["label"], result[0]["score"]
    except Exception as e:
        print(f"Sentiment analysis failed: {e}")
        return "UNKNOWN", 0.0

# Convert text to vector using Word2Vec
def vectorize_text(text, model):
    words = text.split()
    vector = [model.wv[word] for word in words if word in model.wv]
    return sum(vector) / len(vector) if vector else np.zeros(100)

# Predict using models
def predict_mental_health(text):
    print("\nProcessing your input...")

    # Step 1: Clean, Translate, and Detect Language
    cleaned_text = clean_text(text)
    detected_lang, translated_text = detect_and_translate(cleaned_text)

    print(f"Detected Language: {detected_lang.upper()}")
    print(f"Translated Text: {translated_text}")

    # Step 2: Perform Sentiment Analysis
    sentiment, score = analyze_sentiment(translated_text)
    print(f"Sentiment: {sentiment} (Confidence: {score:.2f})")

    if sentiment == "POSITIVE" and score > 0.85:
        return "✅ No Issue Detected. Stay positive!"

    # Step 3: Predict Using LightGBM
    vectorized_text = np.array([vectorize_text(translated_text, w2v_model)])
    prediction = lgb_model.predict(vectorized_text)
    return f"⚠️ Detected Mental Health Issue: {prediction[0]}"

# Get User Input
while True:
    user_text = input("\nEnter your thoughts or feelings (type 'exit' to quit): ")
    if user_text.lower() == 'exit':
        break
    result = predict_mental_health(user_text)
    print(result)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Loading models...


Device set to use cpu



Enter your thoughts or feelings (type 'exit' to quit): i am fine

Processing your input...
Translation failed: [Errno 11001] getaddrinfo failed
Detected Language: UNKNOWN
Translated Text: i am fine
Sentiment: POSITIVE (Confidence: 1.00)
✅ No Issue Detected. Stay positive!

Enter your thoughts or feelings (type 'exit' to quit): priya pagal hai

Processing your input...
Translation failed: [Errno 11001] getaddrinfo failed
Detected Language: UNKNOWN
Translated Text: priya pagal hai
Sentiment: POSITIVE (Confidence: 0.93)
✅ No Issue Detected. Stay positive!

Enter your thoughts or feelings (type 'exit' to quit): Mental Health Awareness Month: I have schizoaffective bipolar type and I just graduated with my BA in English (Magna Cum Laude) at age 28 after dropping out of high school and having intermittently relapsed into psychosis for a duration of 8 hospitalizations. With loving support success is possible

Processing your input...
Translation failed: [Errno 11001] getaddrinfo failed
Detec

In [1]:
#with csv file
import pandas as pd
import numpy as np
import re
import string
import torch
import joblib
from gensim.models import Word2Vec
from googletrans import Translator
from transformers import pipeline
from sklearn.metrics import accuracy_score, classification_report

# Load the data and models
df = pd.read_csv("C:/Users/Prem Gupta/Downloads/filtered_cleaned_data_final2.csv")
w2v_model = joblib.load("word2vec_model.pkl")
classifier_model = joblib.load("mental_health_classifier_lgb.pkl")

# Initialize models
translator = Translator()
sentiment_model = pipeline("sentiment-analysis")

# Tokenize text for Word2Vec
if not w2v_model:
    tokenized_text = [tweet.split() for tweet in df["cleaned_text"]]
    w2v_model = Word2Vec(sentences=tokenized_text, vector_size=100, window=5, min_count=2, workers=4)

# Function to clean text
def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)  # Remove URLs
    text = re.sub(r"[^a-zA-Z0-9\s\?\!]", "", text)  # Keep alphanumeric, ?, !
    text = text.lower().strip()
    return text

# Detect and translate language using Google Translate
def detect_and_translate(text):
    detected_lang = translator.detect(text).lang
    if detected_lang != "en":
        translated = translator.translate(text, dest="en").text
        return detected_lang, translated
    return detected_lang, text

# Perform sentiment analysis
def analyze_sentiment(text):
    result = sentiment_model(text)
    return result[0]["label"], result[0]["score"]

# Convert text to vector using Word2Vec
def vectorize_text(text, model):
    words = text.split()
    vector = [model.wv[word] for word in words if word in model.wv]
    return sum(vector) / len(vector) if vector else np.zeros(100)

# Predict mental health issue using LightGBM
def predict_mental_health(text):
    vectorized_text = vectorize_text(text, w2v_model)
    vectorized_text = np.array([vectorized_text])
    predicted_label = classifier_model.predict(vectorized_text)
    return predicted_label[0]

# Main pipeline
user_text = input("Enter your thoughts or feelings: ")
cleaned_text = clean_text(user_text)
detected_lang, translated_text = detect_and_translate(cleaned_text)
sentiment, score = analyze_sentiment(translated_text)

print("\n=== Mental Health Analysis ===")
print(f"Original Text: {user_text}")
print(f"Detected Language: {detected_lang.upper()}")
print(f"Translated Text: {translated_text}")
print(f"Sentiment: {sentiment} (Confidence: {score:.2f})")

if sentiment == "POSITIVE":
    print("✅ No issues detected. Stay positive and take care of yourself! 💙")
else:
    predicted_issue = predict_mental_health(translated_text)
    print(f"⚠️ Potential Mental Health Issue Identified: {predicted_issue}. Please consider seeking professional help. 💜")
    
while True:
    user_text = input("\nEnter your thoughts or feelings (type 'exit' to quit): ")
    if user_text.lower() == 'exit':
        break
    result = predict_mental_health(user_text)
    print(result)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


Enter your thoughts or feelings: hi


ConnectError: [Errno 11001] getaddrinfo failed