<a href="https://colab.research.google.com/github/Harsh23-glitch/Harsh23-glitch/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import joblib

print("Starting Cell 1: Data Preparation and Model Training...")

# --- Data Loading and Cleaning ---
try:
    anxiety = pd.read_csv('anxiety_clean.csv')
    depression = pd.read_csv('depression_cleaned_organized.csv')
    normal = pd.read_csv('normal_clean.csv')
except FileNotFoundError as e:
    print(f"FATAL ERROR: File not found: {e}. Please ensure files are uploaded.")
    raise

# --- FIX: Define Label Column FIRST ---
anxiety['label'] = 'anxiety'
depression['label'] = 'depression'
normal['label'] = 'normal'

# Standardization of the text column name
if 'Text' in depression.columns:
    depression.rename(columns={'Text': 'tweet'}, inplace=True)

# Concatenation and Dropping NAs
df = pd.concat([depression, anxiety, normal], ignore_index=True)
df.dropna(subset=['tweet'], inplace=True)

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

df['clean_text'] = df['tweet'].apply(clean_text)

# --- Model Training ---
X = df['clean_text']
# Now 'label' is guaranteed to exist in df
X_train, _, _, _ = train_test_split(X, df['label'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000)
vectorizer.fit(X_train)

kmeans = KMeans(n_clusters=3, random_state=42, n_init='auto')
X_train_vec = vectorizer.transform(X_train)
kmeans.fit(X_train_vec)

# --- Save Models to Disk ---
joblib.dump(vectorizer, "vectorizer.pkl")
joblib.dump(kmeans, "kmeans_model.pkl")

print("Cell 1 Complete. Models saved: vectorizer.pkl and kmeans_model.pkl")

Starting Cell 1: Data Preparation and Model Training...
Cell 1 Complete. Models saved: vectorizer.pkl and kmeans_model.pkl


In [23]:
# --- Define the Final Bias-Corrected Mapping ---
print("\nStarting Cell 2: Defining Cluster Mapping...")

cluster_to_label = {
    0: "normal",
    1: "anxiety",
    2: "depression"
}

print(f"Cell 2 Complete. Final Mapping: {cluster_to_label}")


Starting Cell 2: Defining Cluster Mapping...
Cell 2 Complete. Final Mapping: {0: 'normal', 1: 'anxiety', 2: 'depression'}


In [24]:
import gradio as gr
import re
import joblib

# --- Load Models from Disk ---
try:
    VECTORIZER = joblib.load("vectorizer.pkl")
    KMEANS = joblib.load("kmeans_model.pkl")
    print("Models successfully loaded from disk.")
except FileNotFoundError:
    print("FATAL ERROR: Could not find 'vectorizer.pkl' or 'kmeans_model.pkl'. Ensure Cell 1 ran successfully.")
    raise

# --- Prediction Function ---
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def predict_cluster_robust(text):
    if not text.strip():
        return "Please enter text to analyze."

    cleaned = clean_text(text)

    # Use the loaded components
    vec = VECTORIZER.transform([cleaned])
    cluster = KMEANS.predict(vec)[0]
    prediction = cluster_to_label.get(cluster, "UNKNOWN_CLUSTER")

    return f"Predicted Mental State: {prediction.upper()} (Cluster: {cluster})"

# --- Launch Gradio Interface ---
ui = gr.Interface(
    fn=predict_cluster_robust,
    inputs=gr.Textbox(lines=5, placeholder="Enter a message or text snippet..."),
    outputs="text",
    title="Mental Health Clustering System (K-Means - File Loaded Robust Version)",
    description=f"Unsupervised K-Means model to classify text into: **{cluster_to_label[0].upper()}** / **{cluster_to_label[1].upper()}** / **{cluster_to_label[2].upper()}**"
)

print("\nCell 3 Ready: Launching Gradio Interface...")
ui.launch(share=True)

Models successfully loaded from disk.

Cell 3 Ready: Launching Gradio Interface...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c641ce5b71ee151cc0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


