In [None]:
import pandas as pd
import torch
from torch import nn
from torch import optim 
from sklearn.metrics import accuracy_score,precision_score,recall_score

### loading and preprocessing of dataset

In [3]:
df=pd.read_csv(r"C:\Users\MY Laptop\Desktop\guvi_class\mental health survey\train.csv")

df.drop(['id','Name','City'],axis=1,inplace=True)
df.loc[df['Working Professional or Student'] == 'Working Professional', 'CGPA'] = 0.0
df.loc[df['Working Professional or Student'] == 'Working Professional', 'Academic Pressure'] = 0.0
df.loc[df['Working Professional or Student'] == 'Working Professional', 'Study Satisfaction'] = 0
df['Profession']=df[['Profession']].fillna(df['Profession'].mode()[0])
df['Degree']=df[['Degree']].fillna(df['Degree'].mode()[0])
df['Dietary Habits']=df[['Dietary Habits']].fillna(df['Dietary Habits'].mode()[0])
df['Work Pressure'] = df[['Work Pressure']].fillna(df['Work Pressure'].median())
df['Academic Pressure'] = df[['Academic Pressure']].fillna(df['Academic Pressure'].median())
df['Job Satisfaction'] = df[['Job Satisfaction']].fillna(df['Job Satisfaction'].median())
df['Financial Stress'] = df[['Financial Stress']].fillna(df['Financial Stress'].median())
df['CGPA'] = df[['CGPA']].fillna(df['CGPA'].median())
df['Study Satisfaction'] = df[['Study Satisfaction']].fillna(df['Study Satisfaction'].median())

# Top list 
allowed_profession=['Teacher','Content Writer','Architect','Consultant','HR Manager','Pharmacist','Doctor','Business Analyst','Entrepreneur','Chemist']
df['Profession'] = df['Profession'].where(df['Profession'].isin(allowed_profession), 'Others')
allowed_sleepduration=['Less than 5 hours','7-8 hours','More than 8 hours','5-6 hours','3-4 hours']
df['Sleep Duration'] = df['Sleep Duration'].where(df['Sleep Duration'].isin(allowed_sleepduration), 'Others')
allowed_habits=['Moderate','Unhealthy','Healthy']
df['Dietary Habits'] = df['Dietary Habits'].where(df['Dietary Habits'].isin(allowed_habits), 'Others')
allowed_degree=['Class 12'  ,'B.Ed' ,'B.Arch','B.Com','B.Pharm','BCA','M.Ed','MCA','BBA' ,'BSc']
df['Degree'] = df['Degree'].where(df['Degree'].isin(allowed_degree), 'Others')


nominal_data=['Gender','Working Professional or Student','Profession','Degree','Sleep Duration','Dietary Habits'] #OHC
ordinal_data=['Family History of Mental Illness', 'Have you ever had suicidal thoughts ?']
df['Family History of Mental Illness']=df['Family History of Mental Illness'].replace({'No':0,'Yes':1})
df['Have you ever had suicidal thoughts ?']=df['Have you ever had suicidal thoughts ?'].replace({'No':0,'Yes':1})
df=pd.get_dummies(df,columns=nominal_data,dtype='int')

# handing outlier 

import numpy as np
outliers=['Academic Pressure','CGPA',
       'Study Satisfaction']
for col in outliers:

    Q1 = df[col].quantile(0.25)  # 25th percentile
    Q3 = df[col].quantile(0.75)  # 75th percentile
    IQR = Q3 - Q1  # Interquartile range

    # Define lower and upper bound
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Cap values at lower and upper bound
    df[col] = np.where(df[col] < lower_bound, lower_bound, df[col])
    df[col] = np.where(df[col] > upper_bound, upper_bound, df[col])


  df['Family History of Mental Illness']=df['Family History of Mental Illness'].replace({'No':0,'Yes':1})
  df['Have you ever had suicidal thoughts ?']=df['Have you ever had suicidal thoughts ?'].replace({'No':0,'Yes':1})


In [None]:
class DNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(DNN,self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size,hidden_size[0]),
            nn.ReLU(),
            nn.Linear(hidden_size[0],hidden_size[1]),
            nn.ReLU(),
            nn.Linear(hidden_size[1],hidden_size[2]),
            nn.ReLU(),
            nn.Linear(hidden_size[2],hidden_size[3]),
            nn.ReLU(),
            nn.Linear(hidden_size[3],hidden_size[4]),
            nn.ReLU(),
            nn.Linear(hidden_size[4],output_size),
            nn.Sigmoid()
        )
        
    def forward(self,X):
        X = self.layers(X)
        return X
model = DNN(input_size,hidden_size,output_size)
criterion= nn.BCELoss()
# optimizer = optim.SGD(model.parameters())
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

num_epoch= 100
for epoch in range(num_epoch):
    # optimizer.zero_grad()
    # output = model(X)
    # loss = criterion(output,y)
    # loss.backward()
    # optimizer.step()
    # print(f"epoch {epoch}/{num_epoch}: loss: {loss.item()}")
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output,y)
    loss.backward()
    optimizer.step()    
    predicted = (output > 0.5).float()  # Binary prediction
    all_preds = predicted.cpu().numpy()
    all_labels = y.cpu().numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    
    # Print metrics
    print(f"epoch {epoch}/{num_epoch}: loss: {loss.item():.4f}, accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}")






epoch 0/100: loss: 0.6963, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 1/100: loss: 0.6936, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 2/100: loss: 0.6893, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 3/100: loss: 0.6886, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 4/100: loss: 0.6868, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 5/100: loss: 0.6844, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 6/100: loss: 0.6820, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 7/100: loss: 0.6796, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 8/100: loss: 0.6777, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 9/100: loss: 0.6756, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 10/100: loss: 0.6733, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 11/100: loss: 0.6705, accuracy: 0.5000, precision: 0.5000, recall: 1.0000
epoch 12/100: loss: 0.6671, accuracy: 0.7175, prec

In [9]:

class DNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(DNN,self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size,hidden_size[0]),
            nn.ReLU(),
            nn.Linear(hidden_size[0],hidden_size[1]),
            nn.ReLU(),
            nn.Linear(hidden_size[1],hidden_size[2]),
            nn.ReLU(),
            nn.Linear(hidden_size[2],hidden_size[3]),
            nn.ReLU(),
            nn.Linear(hidden_size[3],output_size),
            nn.Sigmoid()
        )
        
    def forward(self,X):
        X = self.layers(X)
        return X
model = DNN(input_size,hidden_size,output_size)
criterion= nn.BCELoss()
# optimizer = optim.SGD(model.parameters())
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

num_epoch= 100
for epoch in range(num_epoch):
    # optimizer.zero_grad()
    # output = model(X)
    # loss = criterion(output,y)
    # loss.backward()
    # optimizer.step()
    # print(f"epoch {epoch}/{num_epoch}: loss: {loss.item()}")
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output,y)
    loss.backward()
    optimizer.step()    
    predicted = (output > 0.5).float()  # Binary prediction
    all_preds = predicted.cpu().numpy()
    all_labels = y.cpu().numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    
    # Print metrics
    print(f"epoch {epoch}/{num_epoch}: loss: {loss.item():.4f}, accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}")





epoch 0/100: loss: 0.6981, accuracy: 0.4927, precision: 0.4963, recall: 0.9831
epoch 1/100: loss: 0.6812, accuracy: 0.5003, precision: 0.5749, recall: 0.0025


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 2/100: loss: 0.6578, accuracy: 0.5000, precision: 0.0000, recall: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 3/100: loss: 0.6424, accuracy: 0.5000, precision: 0.0000, recall: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 4/100: loss: 0.6312, accuracy: 0.5000, precision: 0.0000, recall: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 5/100: loss: 0.6097, accuracy: 0.5000, precision: 0.0000, recall: 0.0000
epoch 6/100: loss: 0.5878, accuracy: 0.5832, precision: 0.9593, recall: 0.1737
epoch 7/100: loss: 0.5679, accuracy: 0.7305, precision: 0.9235, recall: 0.5026
epoch 8/100: loss: 0.5422, accuracy: 0.7987, precision: 0.9055, recall: 0.6670
epoch 9/100: loss: 0.5127, accuracy: 0.8089, precision: 0.9021, recall: 0.6930
epoch 10/100: loss: 0.4909, accuracy: 0.8127, precision: 0.9021, recall: 0.7015
epoch 11/100: loss: 0.4718, accuracy: 0.8271, precision: 0.8964, recall: 0.7398
epoch 12/100: loss: 0.4518, accuracy: 0.8480, precision: 0.8817, recall: 0.8039
epoch 13/100: loss: 0.4337, accuracy: 0.8616, precision: 0.8642, recall: 0.8581
epoch 14/100: loss: 0.4141, accuracy: 0.8665, precision: 0.8588, recall: 0.8771
epoch 15/100: loss: 0.3933, accuracy: 0.8691, precision: 0.8641, recall: 0.8760
epoch 16/100: loss: 0.3724, accuracy: 0.8726, precision: 0.8633, recall: 0.8854
epoch 17/100: loss: 0.3509, accuracy: 0.8763,