# CNN-EDAIC

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score

def pad_features_to_square(X):
    num_features = X.shape[1]
    square_size = int(np.ceil(np.sqrt(num_features)))
    num_to_pad = square_size**2 - num_features
    padded_features = np.pad(X, ((0, 0), (0, num_to_pad)), mode='constant')
    return padded_features

def prepare_data_for_cnn(features, size):
    padded_features = pad_features_to_square(features)
    reshaped_features = padded_features.reshape((features.shape[0], size, size, 1))
    return reshaped_features

def create_model():
    train_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv')
    validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv')
    test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

    train_data.dropna(inplace=True)
    validation_data.dropna(inplace=True)
    test_data.dropna(inplace=True)

    X_train = train_data.drop(columns=['Class', 'name']).values
    y_train = train_data['Class'].values
    X_validation = validation_data.drop(columns=['Class', 'name']).values
    y_validation = validation_data['Class'].values
    X_test = test_data.drop(columns=['Class', 'name']).values
    y_test = test_data['Class'].values

    scaler = StandardScaler().fit(np.vstack((X_train, X_validation, X_test)))
    X_train_scaled = scaler.transform(X_train)
    X_validation_scaled = scaler.transform(X_validation)
    X_test_scaled = scaler.transform(X_test)

    size = int(np.sqrt(pad_features_to_square(X_train_scaled).shape[1]))

    X_train_cnn = prepare_data_for_cnn(X_train_scaled, size)
    X_validation_cnn = prepare_data_for_cnn(X_validation_scaled, size)
    X_test_cnn = prepare_data_for_cnn(X_test_scaled, size)

    inputs = Input(shape=(size, size, 1))
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_data=(X_validation_cnn, y_validation))

    y_pred = (model.predict(X_test_cnn) > 0.5).astype(int)
    print("Classification Report for Testing Set:")
    print(classification_report(y_test, y_pred))
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy for Testing Set: {accuracy:.4f}")

    return model, accuracy

# Call the function to create and train the model
trained_model, test_accuracy = create_model()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.77      0.93      0.85      1897
           1       0.20      0.06      0.09       549

    accuracy                           0.74      2446
   macro avg       0.49      0.50      0.47      2446
weighted avg       0.64      0.74      0.68      2446

Accuracy for Testing Set: 0.7371


# **1D-CNN**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score

def prepare_data_for_cnn(features):
    # Add a new axis for channels
    reshaped_features = np.expand_dims(features, axis=-1)
    return reshaped_features

def create_model():
    train_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv', header=0, index_col=0)
    validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv', header=0, index_col=0)
    test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv', header=0, index_col=0)

    train_data.dropna(inplace=True)
    validation_data.dropna(inplace=True)
    test_data.dropna(inplace=True)

    # Select only the columns from the second column onwards as features
    X_train = train_data.iloc[:, 1:].values
    y_train = train_data['Class'].values
    X_validation = validation_data.iloc[:, 1:].values
    y_validation = validation_data['Class'].values
    X_test = test_data.iloc[:, 1:].values
    y_test = test_data['Class'].values

    scaler = StandardScaler().fit(np.vstack((X_train, X_validation, X_test)))
    X_train_scaled = scaler.transform(X_train)
    X_validation_scaled = scaler.transform(X_validation)
    X_test_scaled = scaler.transform(X_test)

    X_train_cnn = prepare_data_for_cnn(X_train_scaled)
    X_validation_cnn = prepare_data_for_cnn(X_validation_scaled)
    X_test_cnn = prepare_data_for_cnn(X_test_scaled)

    inputs = Input(shape=(X_train_cnn.shape[1], 1))  # Input shape for 1D CNN
    x = Conv1D(32, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(64, kernel_size=3, activation='relu')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_data=(X_validation_cnn, y_validation))

    y_pred = (model.predict(X_test_cnn) > 0.5).astype(int)
    print("Classification Report for Testing Set:")
    print(classification_report(y_test, y_pred))
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy for Testing Set: {accuracy:.4f}")

    return model, accuracy

# Call the function to create and train the model
trained_model, test_accuracy = create_model()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.78      0.98      0.86      1897
           1       0.24      0.03      0.05       549

    accuracy                           0.76      2446
   macro avg       0.51      0.50      0.46      2446
weighted avg       0.66      0.76      0.68      2446

Accuracy for Testing Set: 0.7633


# **save model 1D**
# latest_CNN.h5 is 1D CNN

In [None]:
from tensorflow.keras.models import save_model

# Save the model
save_model(trained_model, 'latest_CNN.h5')


  save_model(trained_model, 'latest_CNN.h5')


# Saving the Model


In [None]:
# After training
import tensorflow as tf
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/CNN_2-EDAIC.h5')  # Save the model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Load and Test on EDAIC


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the saved model\

model_save_path = '/content/drive/MyDrive/latest_CNN.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Make predictions on the test data
y_pred = (loaded_model.predict(X_test_scaled) > 0.5).astype(int)

# Calculate and print the classification report and accuracy
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_pred))
accuracy_test = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test:.4f}")


Model loaded successfully.
CNN Model Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.78      0.95      0.86      1897
           1       0.26      0.06      0.10       549

    accuracy                           0.75      2446
   macro avg       0.52      0.51      0.48      2446
weighted avg       0.66      0.75      0.69      2446


Accuracy for CNN Testing Set: 0.7522


# Test on Android

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the saved model
model_save_path = '/content/drive/MyDrive/latest_CNN.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Make predictions on the test data
y_pred = (loaded_model.predict(X_test_scaled) > 0.5).astype(int)

# Calculate and print the classification report and accuracy
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_pred))
accuracy_test = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test:.4f}")


Model loaded successfully.
CNN Model Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.42      0.94      0.58       624
           1       0.62      0.07      0.13       875

    accuracy                           0.43      1499
   macro avg       0.52      0.51      0.35      1499
weighted avg       0.54      0.43      0.31      1499


Accuracy for CNN Testing Set: 0.4323


# **Load and Fine-tune on Android**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load the training and validation dataset
interview_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv')

# Prepare the dataset by dropping the 'name' column and separating features and labels
X = interview_df.drop(columns=['name', 'Class']).values
y = interview_df['Class'].values

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Load the pre-trained model
model = load_model('/content/drive/MyDrive/latest_CNN.h5')

# Compile the model with a smaller learning rate
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Use early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Fine-tune the model on the new dataset
model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=10, callbacks=[early_stopping])

# Save the fine-tuned model
model.save('/content/drive/MyDrive/Depression/Model/finetune_on_latest.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


# Test on Android

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

# Load the test dataset
test_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')

# Prepare the test dataset by dropping the 'name' column and separating features and labels
X_test = test_df.drop(columns=['name', 'Class']).values
y_test = test_df['Class'].values

# Scale the features of the test set using the same scaler as used for training
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Load the fine-tuned model
fine_tuned_model = load_model('/content/drive/MyDrive/Depression/Model/finetune_on_latest.h5')

# Make predictions on the test set
predictions = fine_tuned_model.predict(X_test_scaled)
# Convert probabilities to class labels based on a threshold (0.5 for binary classification)
predicted_classes = (predictions > 0.5).astype(int)

# Calculate and print the accuracy on the test set
test_accuracy = accuracy_score(y_test, predicted_classes)
print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.6184122748498999


# Test on EDAIC

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

# Load the test dataset
test_df = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

# Prepare the test dataset by dropping the 'name' column and separating features and labels
X_test = test_df.drop(columns=['name', 'Class']).values
y_test = test_df['Class'].values

# Scale the features of the test set using the same scaler as used for training
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Load the fine-tuned model
fine_tuned_model = load_model('/content/drive/MyDrive/Depression/Model/finetune_on_latest.h5')

# Make predictions on the test set
predictions = fine_tuned_model.predict(X_test_scaled)
# Convert probabilities to class labels based on a threshold (0.5 for binary classification)
predicted_classes = (predictions > 0.5).astype(int)

# Calculate and print the accuracy on the test set
test_accuracy = accuracy_score(y_test, predicted_classes)
print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.6369582992641046


# CSV Details

In [None]:
import pandas as pd

def csv_details(file_path):
    # Load CSV file into a DataFrame
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print("File not found.")
        return

    # Display basic details
    print("Columns:")
    print(df.columns.tolist())
    print("\nData Types:")
    print(df.dtypes)
    print("\nPreview (first 5 rows):")
    print(df.head())

# Example usage:
csv_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv'  # Replace this with the path to your CSV file
csv_details(csv_file_path)


Columns:
['name', 'Class', 'feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9', 'feature_10', 'feature_11', 'feature_12', 'feature_13', 'feature_14', 'feature_15', 'feature_16', 'feature_17', 'feature_18', 'feature_19', 'feature_20', 'feature_21', 'feature_22', 'feature_23', 'feature_24', 'feature_25', 'feature_26', 'feature_27', 'feature_28', 'feature_29', 'feature_30', 'feature_31', 'feature_32', 'feature_33', 'feature_34', 'feature_35', 'feature_36', 'feature_37', 'feature_38', 'feature_39', 'feature_40', 'feature_41', 'feature_42', 'feature_43', 'feature_44', 'feature_45', 'feature_46', 'feature_47', 'feature_48', 'feature_49', 'feature_50', 'feature_51', 'feature_52', 'feature_53', 'feature_54', 'feature_55', 'feature_56', 'feature_57', 'feature_58', 'feature_59', 'feature_60', 'feature_61', 'feature_62', 'feature_63', 'feature_64', 'feature_65', 'feature_66', 'feature_67', 'feature_68', 'feature_69', 'fe