Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


load the dataset

In [None]:
import pandas as pd

# Update the file path if it's in a specific folder, e.g., '/content/drive/MyDrive/folder_name/Data.csv'
data = pd.read_csv('/content/drive/MyDrive/final_thesis_dataset.csv')

# Display the first few rows to confirm it loaded correctly
data.head()


Unnamed: 0,Timestamp,Gender,Academic year,Name of my University,Religion,Relationship status,CGPA (4.00 Scale),Experience of violence,Residence,Satisfied with result,...,Q51,Q52,Q53,Q54,Q55,Q56,Q57,Anxiety_sum,Anxiety_level,Anxiety
0,28/06/2024 11:17:51,Male,Graduate (স্নাতক),University of Barishal,Islam,Unmarried,greater than3.50,zero,With family,Both,...,3,2,2,2,3,2,3,74,Mild,Yes
1,28/06/2024 11:22:58,Male,Graduate (স্নাতক),Bu,Islam,Unmarried,3.00 to 3.50,zero,In a hostel/hall/other,Both,...,3,5,3,5,4,5,5,136,Severely Moderate,Yes
2,28/06/2024 11:25:02,Male,Graduate (স্নাতক),CUET,Islam,Unmarried,less than3.00,zero,In a hostel/hall/other,Both,...,3,3,2,3,2,2,1,84,Mild,Yes
3,28/06/2024 12:03:22,Male,Graduate (স্নাতক),university of Barishal,Islam,In a complex relationship,3.00 to 3.50,Verbal,With family,zero,...,5,4,2,5,4,4,2,125,Severely Moderate,Yes
4,28/06/2024 12:12:15,Female,Graduate (স্নাতক),University of Barisal,Islam,Unmarried,3.00 to 3.50,zero,In a hostel/hall/other,zero,...,1,2,2,2,2,2,4,76,Mild,Yes


#handle missing value

In [None]:
# Drop rows with any null values
data_cleaned = data.dropna()

# Display the number of rows remaining after removing null values
remaining_data_count = data_cleaned.shape[0]
print(f"Number of rows after removing null values: {remaining_data_count}")



# Find rows with any null values
rows_with_nulls = data[data.isnull().any(axis=1)]

# Display row indices of rows with null values
print("Indices of rows with null values:")
print(rows_with_nulls.index.tolist())

# Optionally, display the first few columns of rows with null values for context
# You can adjust the column range if needed
rows_with_nulls.iloc[:, :10]


Number of rows after removing null values: 659
Indices of rows with null values:
[]


Unnamed: 0,Timestamp,Gender,Academic year,Name of my University,Religion,Relationship status,CGPA (4.00 Scale),Experience of violence,Residence,Satisfied with result


#RF,KNN,DT,etc

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier

# Assuming data_cleaned is your cleaned DataFrame with no NaN values
# Map depression levels to numeric values
y = data_cleaned['Depression_level'].map({'Minimal': 0, 'Mild': 1, 'Moderate': 2, 'Severely Moderate': 3, 'Severe Depression': 4})

# Select features from Q26 onwards, assuming Q26-Q57 are columns 25 to end
X = data_cleaned.iloc[:, 25:]  # Adjust if necessary based on your DataFrame structure

# Ensure X and y are aligned and have no NaN values
X = X[y.notna()]
y = y.dropna()

# One-hot encode categorical variables if necessary
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Initialize the models
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes (Gaussian)': GaussianNB(),
    'Naive Bayes (Multinomial)': MultinomialNB(),
    'MLP': MLPClassifier(random_state=42, max_iter=1000),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Perceptron': Perceptron(),
    'ZeroR Classifier': DummyClassifier(strategy='most_frequent')
}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Output the overall average classification report
    report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)

    # Extract average scores
    accuracy = accuracy_score(y_test, y_pred)
    precision = report['weighted avg']['precision']
    recall = report['weighted avg']['recall']
    f1_score = report['weighted avg']['f1-score']

    print(f"--- {model_name} ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision (avg): {precision:.4f}")
    print(f"Recall (avg): {recall:.4f}")
    print(f"F1 Score (avg): {f1_score:.4f}\n")


--- Random Forest ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000

--- SVM ---
Accuracy: 0.9773
Precision (avg): 0.9777
Recall (avg): 0.9773
F1 Score (avg): 0.9773

--- KNN ---
Accuracy: 0.9394
Precision (avg): 0.9428
Recall (avg): 0.9394
F1 Score (avg): 0.9400

--- Naive Bayes (Gaussian) ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000

--- Naive Bayes (Multinomial) ---
Accuracy: 0.9318
Precision (avg): 0.9321
Recall (avg): 0.9318
F1 Score (avg): 0.9288

--- MLP ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


--- Logistic Regression ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000

--- Gradient Boosting ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000

--- Decision Tree ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000

--- Perceptron ---
Accuracy: 0.2652
Precision (avg): 0.1597
Recall (avg): 0.2652
F1 Score (avg): 0.1427

--- ZeroR Classifier ---
Accuracy: 0.2652
Precision (avg): 0.0703
Recall (avg): 0.2652
F1 Score (avg): 0.1111



#ANN

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

# Assuming data_cleaned is your cleaned DataFrame with no NaN values
# Map depression levels to numeric values
y = data_cleaned['Depression_level'].map({'Minimal': 0, 'Mild': 1, 'Moderate': 2, 'Severely Moderate': 3, 'Severe Depression': 4})

# Select features from Q26 onwards, assuming Q26-Q57 are columns 25 to end
X = data_cleaned.iloc[:, 25:]  # Adjust if necessary based on your DataFrame structure

# Ensure X and y are aligned and have no NaN values
X = X[y.notna()]
y = y.dropna()

# One-hot encode categorical variables if necessary
X_encoded = pd.get_dummies(X, drop_first=True)

# Convert the target variable to a one-hot encoded format
y_encoded = to_categorical(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the ANN model
ann_model = Sequential()
ann_model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
ann_model.add(Dense(64, activation='relu'))
ann_model.add(Dense(y_encoded.shape[1], activation='softmax'))  # Output layer for multi-class classification

# Compile the model
ann_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the ANN model
ann_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the ANN model
y_pred_ann = ann_model.predict(X_test)
y_pred_classes = np.argmax(y_pred_ann, axis=1)  # Convert predictions from one-hot to class labels
y_test_classes = np.argmax(y_test, axis=1)

# Output the overall average classification report
report = classification_report(y_test_classes, y_pred_classes, output_dict=True, zero_division=0)

# Extract average scores
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']

print(f"--- ANN Model ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (avg): {precision:.4f}")
print(f"Recall (avg): {recall:.4f}")
print(f"F1 Score (avg): {f1_score:.4f}\n")




Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.3636 - loss: 1.4806 - val_accuracy: 0.7264 - val_loss: 0.9319
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7532 - loss: 0.8395 - val_accuracy: 0.8585 - val_loss: 0.6009
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8742 - loss: 0.5067 - val_accuracy: 0.8868 - val_loss: 0.4029
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9324 - loss: 0.3048 - val_accuracy: 0.9434 - val_loss: 0.2512
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9863 - loss: 0.1634 - val_accuracy: 0.9811 - val_loss: 0.1645
Epoch 6/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9969 - loss: 0.0897 - val_accuracy: 0.9811 - val_loss: 0.1150
Epoch 7/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━

#CNN

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.utils import to_categorical

# Assuming data_cleaned is your cleaned DataFrame with no NaN values
# Map depression levels to numeric values
y = data_cleaned['Depression_level'].map({'Minimal': 0, 'Mild': 1, 'Moderate': 2, 'Severely Moderate': 3, 'Severe Depression': 4})

# Select features from Q26 onwards, assuming Q26-Q57 are columns 25 to end
X = data_cleaned.iloc[:, 25:]  # Adjust if necessary based on your DataFrame structure

# Ensure X and y are aligned and have no NaN values
X = X[y.notna()]
y = y.dropna()

# One-hot encode categorical variables if necessary
X_encoded = pd.get_dummies(X, drop_first=True)

# Convert the target variable to a one-hot encoded format
y_encoded = to_categorical(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the data for CNN (adding a channel dimension)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)  # (num_samples, num_features, 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Define the CNN model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dense(y_encoded.shape[1], activation='softmax'))  # Output layer for multi-class classification

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the CNN model
cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the CNN model
y_pred_cnn = cnn_model.predict(X_test)
y_pred_classes = np.argmax(y_pred_cnn, axis=1)  # Convert predictions from one-hot to class labels
y_test_classes = np.argmax(y_test, axis=1)

# Output the overall average classification report
report = classification_report(y_test_classes, y_pred_classes, output_dict=True, zero_division=0)

# Extract average scores
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']

print(f"--- CNN Model ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (avg): {precision:.4f}")
print(f"Recall (avg): {recall:.4f}")
print(f"F1 Score (avg): {f1_score:.4f}\n")




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - accuracy: 0.4091 - loss: 1.3715 - val_accuracy: 0.7264 - val_loss: 0.8491
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7973 - loss: 0.6740 - val_accuracy: 0.8491 - val_loss: 0.4767
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9220 - loss: 0.3036 - val_accuracy: 0.9434 - val_loss: 0.2520
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.9808 - loss: 0.1319 - val_accuracy: 0.9717 - val_loss: 0.1129
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9992 - loss: 0.0493 - val_accuracy: 0.9906 - val_loss: 0.0489
Epoch 6/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 1.0000 - loss: 0.0270 - val_accuracy: 0.9906 - val_loss: 0.0421
Epoch 7/10
[1m14/14[0m [32m━━━━

#Stacking

In [None]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import numpy as np

# Assuming data_cleaned is your cleaned DataFrame with no NaN values
y = data_cleaned['Depression_level'].map({
    'Minimal': 0,
    'Mild': 1,
    'Moderate': 2,
    'Severely Moderate': 3,
    'Severe Depression': 4
})

# Select features from Q26 onwards, assuming Q26-Q57 are columns 25 to end
X = data_cleaned.iloc[:, 25:]

# Ensure X and y are aligned and have no NaN values
X = X[y.notna()]
y = y.dropna()

# One-hot encode categorical variables if necessary
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Define base models for stacking
rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm = SVC(kernel='linear', probability=True, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)
nb = GaussianNB()
mlp = MLPClassifier(random_state=42, max_iter=1000)

# Define the stacking ensemble with Logistic Regression as the meta model
base_estimators = [
    ('Random Forest', rf),
    ('SVM', svm),
    ('KNN', knn),
    ('Naive Bayes', nb),
    ('MLP', mlp)
]
meta_model = LogisticRegression()

stacking_model = StackingClassifier(
    estimators=base_estimators,
    final_estimator=meta_model,
    cv=5
)

# Train and evaluate the stacking model
stacking_model.fit(X_train, y_train)
y_pred_stack = stacking_model.predict(X_test)

# Extract average scores
report = classification_report(y_test, y_pred_stack, output_dict=True, zero_division=0)
accuracy = accuracy_score(y_test, y_pred_stack)
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']

print("--- Stacking Model Results ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (avg): {precision:.4f}")
print(f"Recall (avg): {recall:.4f}")
print(f"F1 Score (avg): {f1_score:.4f}")


--- Stacking Model Results ---
Accuracy: 1.0000
Precision (avg): 1.0000
Recall (avg): 1.0000
F1 Score (avg): 1.0000


#ANxiety start

#Stacking

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Prepare data
X_anxiety = data_cleaned.iloc[:, 25:57]  # Selecting relevant features
y_anxiety = data_cleaned['Anxiety_level'].map({
    'Minimal': 0, 'Mild': 1, 'Moderate': 2, 'Severely Moderate': 3, 'Severe Anxiety': 4
})

# Drop rows with NaN in y if any
X_anxiety = X_anxiety[y_anxiety.notna()]
y_anxiety = y_anxiety.dropna()

# One-hot encode categorical columns if needed
X_encoded_anxiety = pd.get_dummies(X_anxiety, drop_first=True)

# Scale features
scaler = StandardScaler()
X_scaled_anxiety = scaler.fit_transform(X_encoded_anxiety)

# Split data
X_train_anxiety, X_test_anxiety, y_train_anxiety, y_test_anxiety = train_test_split(
    X_scaled_anxiety, y_anxiety, test_size=0.2, random_state=42
)

# Define base models
estimators = [
    ('Random Forest', RandomForestClassifier(random_state=42)),
    ('SVM', SVC(kernel='linear', probability=True, random_state=42)),
    ('KNN', KNeighborsClassifier(n_neighbors=5)),
    ('Naive Bayes', GaussianNB()),
    ('MLP', MLPClassifier(max_iter=300, random_state=42))
]

# Define the stacking ensemble with Logistic Regression as meta-classifier
stacking_model_anxiety = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),  # Meta-classifier
    cv=5  # Number of cross-validation folds for stacking
)

# Train the stacking model
stacking_model_anxiety.fit(X_train_anxiety, y_train_anxiety)

# Predictions and evaluation
y_pred_anxiety = stacking_model_anxiety.predict(X_test_anxiety)

# Extract average accuracy, precision, recall, F1-score
report = classification_report(y_test_anxiety, y_pred_anxiety, output_dict=True, zero_division=0)
accuracy = accuracy_score(y_test_anxiety, y_pred_anxiety)
precision_avg = report['weighted avg']['precision']
recall_avg = report['weighted avg']['recall']
f1_score_avg = report['weighted avg']['f1-score']

print("--- Stacking Model Results for Anxiety ---")
print(f"Accuracy (avg): {accuracy:.4f}")
print(f"Precision (avg): {precision_avg:.4f}")
print(f"Recall (avg): {recall_avg:.4f}")
print(f"F1 Score (avg): {f1_score_avg:.4f}")




--- Stacking Model Results for Anxiety ---
Accuracy (avg): 0.7652
Precision (avg): 0.7654
Recall (avg): 0.7652
F1 Score (avg): 0.7628




#Rest algorithms

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.dummy import DummyClassifier
import lightgbm as lgb
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

# Prepare data
X_anxiety = data_cleaned.iloc[:, 25:57]  # Selecting relevant features
y_anxiety = data_cleaned['Anxiety_level'].map({
    'Minimal': 0, 'Mild': 1, 'Moderate': 2, 'Severely Moderate': 3, 'Severe Anxiety': 4
})

# Drop rows with NaN in y if any
X_anxiety = X_anxiety[y_anxiety.notna()]
y_anxiety = y_anxiety.dropna()

# One-hot encode categorical columns if needed
X_encoded_anxiety = pd.get_dummies(X_anxiety, drop_first=True)

# Scale features
scaler = StandardScaler()
X_scaled_anxiety = scaler.fit_transform(X_encoded_anxiety)

# Split data
X_train_anxiety, X_test_anxiety, y_train_anxiety, y_test_anxiety = train_test_split(
    X_scaled_anxiety, y_anxiety, test_size=0.2, random_state=42
)

# Define models
models = {
    "Naive Bayes": GaussianNB(),
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Extra Trees": ExtraTreesClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "SVM": SVC(kernel='linear', probability=True, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42),
    "Ridge Classifier": RidgeClassifier(),
    "LDA": LinearDiscriminantAnalysis(),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "LightGBM": lgb.LGBMClassifier(random_state=42),
    "Dummy Classifier": DummyClassifier(strategy="most_frequent"),
    "QDA": QuadraticDiscriminantAnalysis()
}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train_anxiety, y_train_anxiety)
    y_pred_anxiety = model.predict(X_test_anxiety)

    # Extract accuracy, precision, recall, F1-score
    report = classification_report(y_test_anxiety, y_pred_anxiety, output_dict=True, zero_division=0)
    accuracy = accuracy_score(y_test_anxiety, y_pred_anxiety)
    precision_avg = report['weighted avg']['precision']
    recall_avg = report['weighted avg']['recall']
    f1_score_avg = report['weighted avg']['f1-score']

    # Print results for each model
    print(f"--- {model_name} ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision (avg): {precision_avg:.4f}")
    print(f"Recall (avg): {recall_avg:.4f}")
    print(f"F1 Score (avg): {f1_score_avg:.4f}")
    print()


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



--- Naive Bayes ---
Accuracy: 0.6818
Precision (avg): 0.6971
Recall (avg): 0.6818
F1 Score (avg): 0.6853

--- Logistic Regression ---
Accuracy: 0.7348
Precision (avg): 0.7321
Recall (avg): 0.7348
F1 Score (avg): 0.7296



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


--- Random Forest ---
Accuracy: 0.7576
Precision (avg): 0.7596
Recall (avg): 0.7576
F1 Score (avg): 0.7521

--- Extra Trees ---
Accuracy: 0.7500
Precision (avg): 0.7489
Recall (avg): 0.7500
F1 Score (avg): 0.7438





--- AdaBoost ---
Accuracy: 0.5833
Precision (avg): 0.5237
Recall (avg): 0.5833
F1 Score (avg): 0.4797

--- SVM ---
Accuracy: 0.6818
Precision (avg): 0.6781
Recall (avg): 0.6818
F1 Score (avg): 0.6791

--- Decision Tree ---
Accuracy: 0.6894
Precision (avg): 0.6904
Recall (avg): 0.6894
F1 Score (avg): 0.6877

--- Gradient Boosting ---
Accuracy: 0.7652
Precision (avg): 0.7692
Recall (avg): 0.7652
F1 Score (avg): 0.7624

--- Ridge Classifier ---
Accuracy: 0.6591
Precision (avg): 0.6370
Recall (avg): 0.6591
F1 Score (avg): 0.6419

--- LDA ---
Accuracy: 0.7045
Precision (avg): 0.7056
Recall (avg): 0.7045
F1 Score (avg): 0.7038

--- KNN ---
Accuracy: 0.7197
Precision (avg): 0.7234
Recall (avg): 0.7197
F1 Score (avg): 0.7204

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001892 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 268
[LightGBM] [I

