In [12]:
import pandas as pd
import numpy as np

In [13]:
df = pd.read_csv("/content/drive/MyDrive/Datasets/dep_mcq.csv")

In [14]:
df.head()

Unnamed: 0,Age,Do you feel down recently?,Irritable towards the work?,Trouble sleeping at night,Do you feel demotivated?,Overeating or loss of appetite,Do you get too many negative thoughts?,Do you feel sad easily?,Trouble in socializing with people/ are you introverted suddenly?,Depression
0,35-40,Yes,Yes,Sometimes,Yes,Yes,Yes,No,Yes,Yes
1,40-45,Yes,No,No,Yes,Yes,No,Yes,Yes,No
2,35-40,Yes,No,Yes,Yes,Yes,Yes,No,Sometimes,No
3,35-40,Yes,Yes,Yes,Yes,No,Yes,Sometimes,No,No
4,40-45,Yes,No,Sometimes,Yes,No,Yes,No,Yes,No


In [15]:
df.replace({'No': 0, 'Yes': 2, 'Sometimes': 1}, inplace=True)

In [16]:
df.head()

Unnamed: 0,Age,Do you feel down recently?,Irritable towards the work?,Trouble sleeping at night,Do you feel demotivated?,Overeating or loss of appetite,Do you get too many negative thoughts?,Do you feel sad easily?,Trouble in socializing with people/ are you introverted suddenly?,Depression
0,35-40,2,2.0,1,2.0,2,2,0.0,2,2
1,40-45,2,0.0,0,2.0,2,0,2.0,2,0
2,35-40,2,0.0,2,2.0,2,2,0.0,1,0
3,35-40,2,2.0,2,2.0,0,2,1.0,0,0
4,40-45,2,0.0,1,2.0,0,2,0.0,2,0


In [17]:
from sklearn.preprocessing import LabelEncoder

# Create a LabelEncoder object
label_encoder = LabelEncoder()

df['Age'] = label_encoder.fit_transform(df['Age'])



In [18]:
df.drop(df[df['Depression'] == 'Not interested to say'].index, inplace=True)

In [19]:
df = df.dropna()

In [20]:
df['Depression'] = pd.to_numeric(df['Depression'], errors='coerce')

# Drop rows where 'Depression' couldn't be converted to int
df.dropna(subset=['Depression'], inplace=True)

# Convert 'Depression' to integer type
df['Depression'] = df['Depression'].astype(int)

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



X = df.drop('Depression', axis=1)
y = df['Depression']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')

Accuracy: 0.97
Confusion Matrix:
[[125   0]
 [  8  99]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       125
           2       1.00      0.93      0.96       107

    accuracy                           0.97       232
   macro avg       0.97      0.96      0.97       232
weighted avg       0.97      0.97      0.97       232



In [22]:
import pickle
model= rf_classifier
file_path = "/content/drive/MyDrive/models/ques.pkl"
with open(file_path, "wb") as file:
    pickle.dump(model, file)

print(f"Model saved as {file_path}")

Model saved as /content/drive/MyDrive/models/ques.pkl


In [23]:
questions = [
    "Enter age: \n20-30- 0\n30-35- 1\n35-40- 2\n40-45- 3\n45-50- 4",
    "Do you feel down recently?\n0 for No\n1 for Sometimes\n2 for Yes",	"Irritable towards the work?\n0 for No\n1 for Sometimes\n2 for Yes",	"Trouble sleeping at night\n0 for No\n1 for Sometimes\n2 for Yes",	"Do you feel demotivated?\n0 for No\n1 for Sometimes\n2 for Yes",	"Overeating or loss of appetite?:\n0 for No\n1 for Sometimes\n2 for Yes",	"Do you get too many negative thoughts?\n0 for No\n1 for Sometimes\n2 for Yes",	"Do you feel sad easily?\n0 for No\n1 for Sometimes\n2 for Yes",	"Trouble in socializing with people/Are you introverted suddenly?\n0 for No\n1 for Sometimes\n2 for Yes"]

# Create a loop to input custom values for each feature
    # Gather input for each feature
custom_input = {}
for i, column_name in enumerate(X.columns):
        if X[column_name].dtype == 'O':  # Check if the column is categorical
            options = df[column_name].unique()
            print(f"{column_name} options: {options}")
            value = input(f"{questions[i]} ({', '.join(map(str, options))}): ")
            # Label encode the input value
            encoded_value = label_encode_input(column_name, value)
            custom_input[column_name] = encoded_value
        else:
            # For numerical features, directly take the input
            custom_input[column_name] = int(input(f"{questions[i]}: "))

    # Create a DataFrame with the custom input
custom_input_df = pd.DataFrame([custom_input])

    # Use the trained model to make predictions
prediction = rf_classifier.predict(custom_input_df)

probabilities = rf_classifier.predict_proba(custom_input_df)
    # Print the predicted result
print(f'The model predicts: {prediction[0]}')

Enter age: 
20-30- 0
30-35- 1
35-40- 2
40-45- 3
45-50- 4: 0
Do you feel down recently?
0 for No
1 for Sometimes
2 for Yes: 1
Irritable towards the work?
0 for No
1 for Sometimes
2 for Yes: 1
Trouble sleeping at night
0 for No
1 for Sometimes
2 for Yes: 1
Do you feel demotivated?
0 for No
1 for Sometimes
2 for Yes: 1
Overeating or loss of appetite?:
0 for No
1 for Sometimes
2 for Yes: 1
Do you get too many negative thoughts?
0 for No
1 for Sometimes
2 for Yes: 1
Do you feel sad easily?
0 for No
1 for Sometimes
2 for Yes: 1
Trouble in socializing with people/Are you introverted suddenly?
0 for No
1 for Sometimes
2 for Yes: 1
The model predicts: 2


In [26]:
probabilities

array([[0.34, 0.66]])

In [25]:
import pickle

with open('rf_classifier_model.pkl', 'wb') as file:
    pickle.dump(rf_classifier, file)