In [None]:
#! pip install matplotlib
#! pip install seaborn
#! pip install tensorflow

In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt 
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [None]:
dataset = pd.read_csv("data.csv", delimiter='\t')

In [None]:
dataset.head(10)

In [None]:
dataset.info()

In [None]:
# Check for duplicates on each row.
dataset.duplicated().value_counts()

In [None]:
# Check for unique values.
dataset.nunique()

In [None]:
columns  = dataset.columns
print('Attributes',columns)

In [None]:
for column in columns:
    print(column)

# **Data Preprocessing**

In [None]:
# Extract columns matching the pattern "Q<number>A"
pattern = r'^Q\d+A$'
scale_column = [column for column in dataset.columns if re.match(pattern, column)]
# Create a new DataFrame with the extracted columns
extracted_data = dataset[scale_column]

In [None]:
extracted_data

In [None]:
#extracted_data.to_csv('mental_health_ex')

In [None]:
#Check if theres any missing or empty item
extracted_data.isnull().sum()

In [None]:
#Subtract 1 from all the response to change the scale from 1 to 4 to 0 to 3
def sub(data):
    return data.subtract(1,axis=1)
regularized_dataset=sub(extracted_data) 

# Declare the question key to generate the dataset for each dataset
DASS_keys = {'Depression': [3, 5, 10, 13, 16, 17, 21, 24, 26, 31, 34, 37, 38, 42],
             'Anxiety': [2, 4, 7, 9, 15, 19, 20, 23, 25, 28, 30, 36, 40, 41],
             'Stress': [1, 6, 8, 11, 12, 14, 18, 22, 27, 29, 32, 33, 35, 39]}
Depression_keys = []
for i in DASS_keys["Depression"]:
    Depression_keys.append('Q'+str(i)+'A')
Stress_keys = []
for i in DASS_keys["Stress"]:
    Stress_keys.append('Q'+str(i)+'A')
Anxiety_keys = []
for i in DASS_keys["Anxiety"]:
    Anxiety_keys.append('Q'+str(i)+'A')
depression_dataset= regularized_dataset.filter(Depression_keys)
stress_dataset = regularized_dataset.filter(Stress_keys)
anxiety_dataset = regularized_dataset.filter(Anxiety_keys)

In [None]:
#regularized_dataset.to_csv('regularized.csv')

In [None]:
#Obtain the total score for each dataset here
def scores(data):
    col=list(data)
    data['Total_Count']=data[col].sum(axis=1)
    return data
depression_dataset=scores(depression_dataset)
stress_dataset=scores(stress_dataset)
anxiety_dataset=scores(anxiety_dataset)

**Display the newly generated datasets**

In [None]:
depression_dataset.head(10)

In [None]:
stress_dataset.head(10)

In [None]:
anxiety_dataset.head(10)

# **Depression Dataset**

In [None]:
#Declaring function to assign the label
def condition(x):
    if x<=9:
        return 'Normal'
    if  10<=x<=13:
        return 'Mild'
    if 14<=x<=20:
        return 'Moderate'
    if 21<=x<=27:
        return 'Severe'
    if x>=28:
        return 'Extremely Severe'

#Apply the condition and drop the "Total_Count" column
depression_dataset['Label']=depression_dataset['Total_Count'].apply(condition)
final_depression_dataset = depression_dataset.drop(columns=['Total_Count'])
final_depression_dataset.head(10)

In [None]:
# Define the desired label arrangement
desired_labels = ['Extremely Severe', 'Severe', 'Moderate', 'Mild', 'Normal']

# Count the frequency of each label
label_counts = final_depression_dataset['Label'].value_counts()
print(label_counts.reindex(desired_labels))

# Define the colors for each bar
colors = ['skyblue', 'green', 'yellow', 'orange', 'gray']

# Reorder the label counts based on the desired arrangement
label_counts_ordered = label_counts.reindex(desired_labels)

# Plot the bar chart
plt.bar(label_counts_ordered.index, label_counts_ordered.values, color=colors)

# Plot the bar chart
# plt.bar(label_counts.index, label_counts.values, color=colors)

# Add labels and title
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.title('Depression Dataset Distribution of Labels')

# Show the plot
plt.show()

# **Stress Dataset**

In [None]:
#Declaring function to assign the label
def condition(x):
    if x<=14:
        return 'Normal'
    if  15<=x<=18:
        return 'Mild'
    if 19<=x<=25:
        return 'Moderate'
    if 26<=x<=33:
        return 'Severe'
    if x>=34:
        return 'Extremely Severe'

#Apply the condition and drop the "Total_Count" column
stress_dataset['Label']=stress_dataset['Total_Count'].apply(condition)
final_stress_dataset = stress_dataset.drop(columns=['Total_Count'])
final_stress_dataset.head(10)

In [None]:
# Define the desired label arrangement
desired_labels = ['Extremely Severe', 'Severe', 'Moderate', 'Mild', 'Normal']

# Count the frequency of each label
label_counts = final_stress_dataset['Label'].value_counts()
print(label_counts.reindex(desired_labels))

# Define the colors for each bar
colors = ['skyblue', 'green', 'yellow', 'orange', 'gray']

# Reorder the label counts based on the desired arrangement
label_counts_ordered = label_counts.reindex(desired_labels)

# Plot the bar chart
plt.bar(label_counts_ordered.index, label_counts_ordered.values, color=colors)

# Add labels and title
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.title('Stress Dataset Distribution of Labels')

# Show the plot
plt.show()

# **Anxiety Dataset**

In [None]:
#Declaring function to assign the label
def condition(x):
    if x<=7:
        return 'Normal'
    if  8<=x<=9:
        return 'Mild'
    if 10<=x<=14:
        return 'Moderate'
    if 15<=x<=19:
        return 'Severe'
    if x>19:
        return 'Extremely Severe'

#Apply the condition and drop the "Total_Count" column
anxiety_dataset['Label']=anxiety_dataset['Total_Count'].apply(condition)
final_anxiety_dataset = anxiety_dataset.drop(columns=['Total_Count'])
final_anxiety_dataset.head(10)

In [None]:
# Define the desired label arrangement
desired_labels = ['Extremely Severe', 'Severe', 'Moderate', 'Mild', 'Normal']

# Count the frequency of each label
label_counts = final_anxiety_dataset['Label'].value_counts()
print(label_counts.reindex(desired_labels))

# Define the colors for each bar
colors = ['skyblue', 'green', 'yellow', 'orange', 'gray']

# Reorder the label counts based on the desired arrangement
label_counts_ordered = label_counts.reindex(desired_labels)

# Plot the bar chart
plt.bar(label_counts_ordered.index, label_counts_ordered.values, color=colors)

# Add labels and title
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.title('Anxiety Dataset Distribution of Labels')

# Show the plot
plt.show()

# **Depression Dataset**

In [None]:
#Seperate the data and labels
depression_labels = final_depression_dataset["Label"]
depression_X = final_depression_dataset.drop(columns=["Label"])

In [None]:
depression_labels

# **Stress Dataset**

In [None]:
#Seperate the data and labels
stress_labels = final_stress_dataset["Label"]
stress_X = final_stress_dataset.drop(columns=["Label"])

In [None]:
stress_labels

In [None]:
stress_X

# **Anxiety Dataset**

In [None]:
#Seperate the data and labels
anxiety_labels = final_anxiety_dataset["Label"]
anxiety_X = final_anxiety_dataset.drop(columns=["Label"])

In [None]:
anxiety_labels

In [None]:
anxiety_X

# **Model Training and testing**

In [None]:
from keras.utils import to_categorical
from sklearn.metrics import precision_score, recall_score, f1_score

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='softplus', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=64, kernel_size=3, activation='softplus'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='softplus'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_save_model(X_train, y_train, condition):
    model = build_cnn_model(X_train.shape[1:], num_classes=y_train.shape[1])
    # Train the model
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)
    # Save the model
    model.save(f"{condition}_cnn_model.keras")
    return model

def evaluate_cnn_model(X_test, y_test, condition):
    model = load_model(f"{condition}_cnn_model.keras")
    predictions = model.predict(X_test)
    predicted_classes = np.argmax(predictions, axis=1)
    precision = precision_score(np.argmax(y_test, axis=1), predicted_classes, average='weighted')
    recall = recall_score(np.argmax(y_test, axis=1), predicted_classes, average='weighted')
    f1 = f1_score(np.argmax(y_test, axis=1), predicted_classes, average='weighted')
    return precision, recall, f1

# Example usage for CNN model
for condition, X, y in [('Stress', stress_X, stress_labels), ('Depression', depression_X, depression_labels), ('Anxiety', anxiety_X, anxiety_labels)]:
    print(f"Training model for {condition}...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    # Reshape input data
    X_train_reshaped = np.expand_dims(X_train, axis=-1)
    X_test_reshaped = np.expand_dims(X_test, axis=-1)
    y_train_categorical = to_categorical(LabelEncoder().fit_transform(y_train))
    y_test_categorical = to_categorical(LabelEncoder().fit_transform(y_test))
    model = train_and_save_model(X_train_reshaped, y_train_categorical, condition)
    test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test_categorical)
    print(f"Test Accuracy for {condition}: {test_accuracy}")
    precision, recall, f1 = evaluate_cnn_model(X_test_reshaped, y_test_categorical, condition)
    print(f"Precision for {condition}: {precision}")
    print(f"Recall for {condition}: {recall}")
    print(f"F1 Score for {condition}: {f1}")


# Prediction

In [None]:
#
#y )
def predict_with_model(X_test,condition):
    model = load_model(f"{condition}_cnn_model.keras")
    predictions = model.predict(X_test)
    predicted_classes = np.argmax(predictions, axis=1) 
    # Get class with highest probability
    return predicted_classes
indices = {
    'Depression': [2, 4, 9, 12, 15, 16, 20, 23, 25, 30, 33, 36, 37, 41], 
    'Anxiety': [1, 3, 6, 8, 14, 18, 19, 22, 24, 27, 29, 35, 39, 40], 
    'Stress': [0, 5, 7, 10, 11, 13, 17, 21, 26, 28, 31, 32, 34, 38]
}

test = [1, 2, 3, 2, 2, 1, 2, 1, 3, 3, 3, 1, 3, 1, 2, 3, 1, 1, 2, 1, 3, 1, 2, 1, 2, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 2, 2, 2]

Depression_test = [test[i] for i in indices['Depression']]
Stress_test = [test[i] for i in indices['Stress']]
Anxiety_test = [test[i] for i in indices['Anxiety']]

classes = ["Extremely Severe","Severe","Moderate","Mild","Normal"]



X_depression_test = np.array(Depression_test).reshape(1, len(Depression_test), 1)
p_d = predict_with_model(X_depression_test, 'Depression')
print(p_d)
X_stress_test = np.array(Stress_test).reshape(1, len(Stress_test), 1)
p_s = predict_with_model(X_stress_test, 'Stress')
print(p_s)
X_anxiety_test = np.array(Anxiety_test).reshape(1, len(Anxiety_test), 1)
p_a = predict_with_model(X_anxiety_test, 'Anxiety')
print(p_a)
depression_sevirity = [classes[i] for i in p_d]
stress_sevirity = [classes[i] for i in p_s]
anxiety_sevirity = [classes[i] for i in p_a]

print(depression_sevirity)
print()
print(stress_sevirity)
print()
print(anxiety_sevirity )

In [None]:
print(sum(Depression_test))
print(sum(Stress_test))
print(sum(Anxiety_test))

In [None]:
print(Depression_test)
print(Stress_test)
print(Anxiety_test)