In [None]:
# Import necessary libraries
from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Depression_for_Comparision(PHQ_9).csv')

# Clean column names by removing leading numbers and spaces
data.columns = data.columns.str.replace(r'^\d+\.\s*', '', regex=True).str.strip().str.lower().str.replace(' ', '_')

# Function to convert CGPA range to float (mean of the range)
def convert_cgpa(cgpa):
    if isinstance(cgpa, str) and '-' in cgpa:
        parts = cgpa.split('-')
        return (float(parts[0].strip()) + float(parts[1].strip())) / 2
    try:
        return float(cgpa)
    except ValueError:
        return np.nan

# Apply the conversion function to the current_cgpa column
data['current_cgpa'] = data['current_cgpa'].apply(convert_cgpa)

# Drop rows with any null values
data_cleaned = data.dropna()

# Display the number of rows remaining after removing null values
remaining_data_count = data_cleaned.shape[0]
print(f"Number of rows after removing null values: {remaining_data_count}")

# Check data types of the columns
print(data_cleaned.dtypes)

# Encode categorical variables
label_encoder = LabelEncoder()
categorical_columns = ['age', 'gender', 'university', 'department', 'academic_year',
                       'did_you_receive_a_waiver_or_scholarship_at_your_university?']

for col in categorical_columns:
    data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])

# Select features and target variable
X = data_cleaned.drop(columns=['depression_label', 'depression_value'])  # Features
y = data_cleaned['depression_label']  # Target variable

# Check for any non-numeric columns before train-test split
print(X.dtypes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (for SVM and KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define base models for stacking
base_estimators = [
    ('Random Forest', RandomForestClassifier()),
    ('SVM', SVC(probability=True)),
    ('KNN', KNeighborsClassifier()),
    ('Naive Bayes', GaussianNB())
]

# Create a stacking classifier
stacking_model = StackingClassifier(
    estimators=base_estimators,
    final_estimator=MLPClassifier(max_iter=1000)
)

# Train the stacking model
stacking_model.fit(X_train, y_train)

# Make predictions with the stacking model
y_pred_stacking = stacking_model.predict(X_test)

# Display the classification report for the stacking model
print("--- Stacking Model ---")
print(classification_report(y_test, y_pred_stacking))
print(f'Accuracy: {accuracy_score(y_test, y_pred_stacking)}\n')


Mounted at /content/drive
Number of rows after removing null values: 1773
age                                                                                                                                                                   object
gender                                                                                                                                                                object
university                                                                                                                                                            object
department                                                                                                                                                            object
academic_year                                                                                                                                                         object
current_cgpa                                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])
A va

--- Stacking Model ---
                              precision    recall  f1-score   support

             Mild Depression       0.89      1.00      0.94        68
          Minimal Depression       1.00      0.60      0.75        15
         Moderate Depression       0.99      0.94      0.96        80
Moderately Severe Depression       0.96      0.96      0.96        97
               No Depression       1.00      1.00      1.00         6
           Severe Depression       0.97      0.99      0.98        89

                    accuracy                           0.95       355
                   macro avg       0.97      0.91      0.93       355
                weighted avg       0.96      0.95      0.95       355

Accuracy: 0.9549295774647887



In [None]:
!pip install imbalanced-learn




In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Depression_for_C_2.csv')

# Clean column names by removing leading numbers and spaces
data.columns = data.columns.str.replace(r'^\d+\.\s*', '', regex=True).str.strip().str.lower().str.replace(' ', '_')

# Select only PHQ-1 to Depression Label columns
columns_to_consider = ['phq-1', 'phq-2', 'phq-3', 'phq-4', 'phq-5', 'phq-6', 'phq-7', 'phq-8', 'phq-9', 'depression_value', 'depression_label']
data_depression = data[columns_to_consider]

# Check unique values in the depression_label column
print("Unique values in 'depression_label' before mapping:", data_depression['depression_label'].unique())

# Drop rows with any null values
data_cleaned = data_depression.dropna()

# Encode the target variable with additional unique labels
# Updated mapping dictionary
mapping_dict = {
    'No Depression': 0,
    'Minimal Depression': 1,
    'Mild Depression': 2,
    'Moderate Depression': 3,
    'Moderately Severe Depression': 4,
    'Severe Depression': 5
}

# Map the depression labels
data_cleaned['depression_label'] = data_cleaned['depression_label'].map(mapping_dict)

# Check for NaN values in the target variable after mapping
if data_cleaned['depression_label'].isnull().any():
    print("There are NaN values in the target variable after mapping.")
    print(data_cleaned['depression_label'].isnull().sum(), "rows have NaN in 'depression_label'.")
    data_cleaned = data_cleaned.dropna(subset=['depression_label'])

# Define features and target variable
X = data_cleaned.drop(columns=['depression_label'])  # Features
y = data_cleaned['depression_label']  # Target variable

# Check number of unique classes in y again
print("Number of unique classes in target variable:", y.nunique())

# Proceed if there are at least two unique classes
if y.nunique() < 2:
    print("Insufficient classes for training.")
else:
    # Continue with the rest of the code...
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale features (for SVM, KNN, and MLP)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Define the base estimators
    base_estimators = [
        ('Random Forest', RandomForestClassifier(random_state=42)),
        ('SVM', SVC(probability=True, random_state=42)),
        ('KNN', KNeighborsClassifier()),
        ('Naive Bayes', GaussianNB())
    ]

    # Define the stacking classifier with MLP as the meta model
    stacking_model = StackingClassifier(
        estimators=base_estimators,
        final_estimator=MLPClassifier(max_iter=1000, random_state=42)
    )

    # Train the stacking model
    stacking_model.fit(X_train, y_train)

    # Make predictions
    y_pred_stacking = stacking_model.predict(X_test)

    # Display the classification report
    print("--- Stacking Model ---")
    print(classification_report(y_test, y_pred_stacking))
    print(f"Accuracy: {accuracy_score(y_test, y_pred_stacking):.4f}\n")

    # Calculate and display specificity for each class in the stacking model
    conf_matrix = confusion_matrix(y_test, y_pred_stacking)
    specificities = []
    for i in range(len(conf_matrix)):
        tn = np.sum(np.delete(np.delete(conf_matrix, i, axis=0), i, axis=1))
        fp = np.sum(conf_matrix[:, i]) - conf_matrix[i, i]
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        specificities.append(specificity)

    print(f"Specificity for each class in stacking model: {specificities}\n")


Unique values in 'depression_label' before mapping: ['Moderate Depression' 'Mild Depression' 'Moderately Severe Depression'
 'Minimal Depression' 'Severe Depression' 'No Depression']
Number of unique classes in target variable: 6
--- Stacking Model ---
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      0.88      0.93        16
           2       0.97      1.00      0.99        71
           3       1.00      1.00      1.00        95
           4       1.00      1.00      1.00       112
           5       1.00      1.00      1.00        93

    accuracy                           0.99       396
   macro avg       1.00      0.98      0.99       396
weighted avg       1.00      0.99      0.99       396

Accuracy: 0.9949

Specificity for each class in stacking model: [1.0, 1.0, 0.9938461538461538, 1.0, 1.0, 1.0]



#ANXIETY

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Anxiety_for_Comparision(GAD-7).csv')

# Clean column names
data.columns = data.columns.str.replace(r'^\d+\.\s*', '', regex=True).str.strip().str.lower().str.replace(' ', '_')

# Select relevant features and target variable
features = [
    'age', 'gender', 'university', 'department', 'academic_year', 'current_cgpa',
    'in_a_semester,_how_often_you_felt_nervous,_anxious_or_on_edge_due_to_academic_pressure?',
    'in_a_semester,_how_often_have_you_been_unable_to_stop_worrying_about_your_academic_affairs?',
    'in_a_semester,_how_often_have_you_had_trouble_relaxing_due_to_academic_pressure?',
    'in_a_semester,_how_often_have_you_been_easily_annoyed_or_irritated_because_of_academic_pressure?',
    'in_a_semester,_how_often_have_you_worried_too_much_about_academic_affairs?',
    'in_a_semester,_how_often_have_you_been_so_restless_due_to_academic_pressure_that_it_is_hard_to_sit_still?',
    'in_a_semester,_how_often_have_you_felt_afraid,_as_if_something_awful_might_happen?'
]
target = 'anxiety_label'

# Drop rows with any null values in features or target
data_cleaned = data.dropna(subset=features + [target])

# Define features and target variable
X = data_cleaned[features]
y = data_cleaned[target]

# Encode categorical features if needed
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define base models
base_models = [
    ('Random Forest', RandomForestClassifier(random_state=42)),
    ('SVM', SVC(probability=True, random_state=42)),
    ('KNN', KNeighborsClassifier()),
    ('Naive Bayes', GaussianNB()),
    ('MLP', MLPClassifier(max_iter=1000, random_state=42))
]

# Stacking Classifier
stacking_model = StackingClassifier(estimators=base_models, final_estimator=RandomForestClassifier(random_state=42))
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred_stacking = stacking_model.predict(X_test)

# Display the classification report
print("--- Stacking Model ---")
print(classification_report(y_test, y_pred_stacking))
print(f"Accuracy: {accuracy_score(y_test, y_pred_stacking):.4f}\n")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
--- Stacking Model ---
                  precision    recall  f1-score   support

    Mild Anxiety       0.97      0.98      0.98       100
 Minimal Anxiety       0.94      0.91      0.92        33
Moderate Anxiety       1.00      0.97      0.99       120
  Severe Anxiety       0.98      1.00      0.99       153

        accuracy                           0.98       406
       macro avg       0.97      0.97      0.97       406
    weighted avg       0.98      0.98      0.98       406

Accuracy: 0.9803



In [None]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
from google.colab import drive
import numpy as np

# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Depression_for_C_2.csv')

# Clean column names by removing leading numbers and spaces
data.columns = data.columns.str.replace(r'^\d+\.\s*', '', regex=True).str.strip().str.lower().str.replace(' ', '_')

# Convert CGPA range to float (mean of the range)
def convert_cgpa(cgpa):
    if isinstance(cgpa, str) and '-' in cgpa:
        parts = cgpa.split('-')
        return (float(parts[0].strip()) + float(parts[1].strip())) / 2
    try:
        return float(cgpa)
    except ValueError:
        return np.nan

# Apply the conversion function to the 'current_cgpa' column
data['current_cgpa'] = data['current_cgpa'].apply(convert_cgpa)

# Handle any NaN values in 'current_cgpa' and 'anxiety_label'
data.dropna(subset=['current_cgpa', 'anxiety_label'], inplace=True)

# Select only the GAD-1 to anxiety_label columns
features = ['gad-1', 'gad-2', 'gad-3', 'gad-4', 'gad-5', 'gad-6', 'gad-7']
if 'anxiety_label' in data.columns:
    features.append('anxiety_label')

# Ensure all required columns are in the dataset
if not all(col in data.columns for col in features):
    raise ValueError("One or more required columns are missing from the dataset.")

X = data[features[:-1]]  # Features (GAD-1 to GAD-7)
y = data['anxiety_label']  # Target variable

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define base models
base_models = [
    ('Random Forest', RandomForestClassifier()),
    ('SVM', SVC(probability=True)),  # Enable probability estimates for stacking
    ('KNN', KNeighborsClassifier()),
    ('Naive Bayes', GaussianNB()),
    ('MLP', MLPClassifier(max_iter=1000))
]

# Stacking Classifier
stacking_model = StackingClassifier(estimators=base_models, final_estimator=RandomForestClassifier())
stacking_model.fit(X_train, y_train)
y_pred_stacking = stacking_model.predict(X_test)

print('--- Stacking Classifier Classification Report ---')
print(classification_report(y_test, y_pred_stacking))
print(f'Stacking Classifier Accuracy: {accuracy_score(y_test, y_pred_stacking)}\n')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
--- Stacking Classifier Classification Report ---
                  precision    recall  f1-score   support

    Mild Anxiety       1.00      1.00      1.00        96
 Minimal Anxiety       1.00      1.00      1.00        29
Moderate Anxiety       1.00      1.00      1.00       105
  Severe Anxiety       1.00      1.00      1.00       116

        accuracy                           1.00       346
       macro avg       1.00      1.00      1.00       346
    weighted avg       1.00      1.00      1.00       346

Stacking Classifier Accuracy: 1.0

