In [1]:
import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
import re

In [2]:
# Import stopwords
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))

In [3]:
# Define a function to clean the text
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W+', ' ', text)  # Remove all non-word characters
    text = ' '.join(word for word in text.split() if word not in STOPWORDS)  # Remove stopwords
    return text

In [4]:
# Load the dataset
df = pd.read_csv('mental_health.csv')

In [5]:
# Preprocess the data
df['Problem'] = df['Problem'].apply(clean_text)
df['Solution'] = df['Solution'].apply(clean_text)
df['Symptoms'] = df['Symptoms'].apply(clean_text)

In [6]:
# Combine symptoms and solutions into one column for classification
df['combined'] = df['Symptoms'] + ' | ' + df['Solution']

In [7]:
# Split the data into training and test sets
X = df['Problem']
y = df['combined']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create a pipeline with a TfidfVectorizer and a RandomForestClassifier
pipeline = make_pipeline(TfidfVectorizer(), RandomForestClassifier(random_state=42))

In [9]:
# Train the model
pipeline.fit(X_train, y_train)

In [10]:
# Evaluate the model
y_pred = pipeline.predict(X_test)
# print(classification_report(y_test, y_pred))

In [14]:
def get_top_k_predictions(model, vectorizer, text, k):
    tfidf_matrix = vectorizer.transform([text])
    probabilities = model.predict_proba(tfidf_matrix)
    top_k_indices = np.argsort(-probabilities[0])[:k]
    print("Probabilities shape:", probabilities.shape)
    print("Top k indices:", top_k_indices)
    return [(model.classes_[i], probabilities[0][i]) for i in top_k_indices]

In [18]:
while True:
    user_input = input("Enter the problem you're experiencing (e.g., I have stress): ")
    user_problem = clean_text(user_input)
    
    if user_input.lower()=='exit':
        break
    
    top_k_predictions = get_top_k_predictions(pipeline.named_steps['randomforestclassifier'], pipeline.named_steps['tfidfvectorizer'], user_problem, k=5)
    predicted_symptoms = []
    predicted_solutions = []

    for pred in top_k_predictions:
        if '|' in pred[0]:
            symptom, solution = pred[0].split('|', 1)  # Split at the first occurrence of '|'
            predicted_symptoms.append(symptom)
            predicted_solutions.append(solution)
        else:
            predicted_symptoms.append(pred[0])

    # Take only the top 5 predicted symptoms and solutions
    predicted_symptoms = predicted_symptoms[:5]
    predicted_solutions = predicted_solutions[:5]
    
    # Print the predictions
    print("Predicted symptoms : ")
    for i in range(1,len(predicted_symptoms)+1):
        print(f'{i}. {predicted_symptoms[i-1]}')
    print("Predicted solutions : ")
    for i in range(1,len(predicted_solutions)+1):
        print(f'{i}. {predicted_solutions[i-1]}')

Enter the problem you're experiencing (e.g., I have stress): what are the symptoms of cold
Probabilities shape: (1, 199)
Top k indices: [ 80  99  41 176 110  91  92 180  12 161]
Predicted symptoms : 
1. fatigue weakness 
2. headache 
3. cough 
4. sore throat 
5. irritability 
6. frequent urination 
7. frustration 
8. support groups peer counseling 
9. biofeedback relaxation techniques manage stress 
10. resentment 
Predicted solutions : 
1.  inhaling steam bowl hot water using humidifier ease nasal congestion improve breathing
2.  minimize exposure smoke pollution allergens irritants worsen cold symptoms
3.  gargling warm salt water soothe sore throat help reduce inflammation
4.  use counter cold medications symptom relief decongestants antihistamines pain relievers
5.  relaxation techniques deep breathing mindfulness progressive muscle relaxation calm body mind
6.  use insulin injections oral medications metformin drugs help regulate blood sugar levels
7.  communication skills trainin