# **Reading**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# RNN

In [None]:
# # WITHOUT FUNCTIONAL API
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Load training and testing data from CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_train.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv')

# # Prepare features (X) and labels (y) for training and testing sets
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Reshape the data for RNN input
# # RNNs require 3D input of shape [samples, timesteps, features]. Here, we'll treat each feature as a timestep.
# # This might not be the optimal approach for every problem and depends on how your data is structured.
# X_train_rnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test_rnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# # Initialize the RNN model
# model_rnn = Sequential()
# model_rnn.add(SimpleRNN(units=64, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2]), return_sequences=False))
# model_rnn.add(Dense(50, activation='relu'))
# model_rnn.add(Dropout(0.5))
# model_rnn.add(Dense(1, activation='sigmoid'))

# # Compile and train the RNN model
# model_rnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# history = model_rnn.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

# # Make predictions using the RNN for the testing set
# y_test_pred_rnn = (model_rnn.predict(X_test_rnn) > 0.5).astype(int)

# # Calculate and print the classification report and accuracy for the RNN model
# print("RNN Model Classification Report for Testing Set:")
# print(classification_report(y_test, y_test_pred_rnn))
# accuracy_test_rnn = accuracy_score(y_test, y_test_pred_rnn)
# print(f"\nAccuracy for RNN Testing Set: {accuracy_test_rnn:.4f}")

In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv')

# Function to preprocess the dataset
def preprocess_data(train_data, test_data):
    # Prepare features and labels
    X_train = train_data.drop(columns=['Class', 'name']).values
    y_train = train_data['Class'].values
    X_test = test_data.drop(columns=['Class', 'name']).values
    y_test = test_data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Reshape data for RNN: (samples, timesteps, features)
    X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
    X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

    return X_train_rnn, y_train, X_test_rnn, y_test

# Preprocess the data
X_train_rnn, y_train, X_test_rnn, y_test = preprocess_data(train_data, test_data)

# Function to create RNN model using the Functional API
def create_functional_model(X_train_shape, rnn_units=64, dense_units=50, dropout_rate=0.5):
    inputs = Input(shape=(X_train_shape[1], X_train_shape[2]))
    x = SimpleRNN(units=rnn_units, activation='relu', return_sequences=False)(inputs)
    x = Dense(dense_units, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the model with the training data shape, RNN units, dense units, and dropout rate
model = create_functional_model(X_train_rnn.shape, rnn_units=64, dense_units=50, dropout_rate=0.5)

# Print model summary to verify architecture
model.summary()

# Train the model
history = model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

# Evaluate the model on the test set
y_test_pred = (model.predict(X_test_rnn) > 0.5).astype(int)
print("Functional API Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred))
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"\nAccuracy for Functional API Model Testing Set: {accuracy_test:.4f}")

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                4224      
                                                                 
 dense (Dense)               (None, 50)                3250      
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 51        
                                                                 
Total params: 7525 (29.39 KB)
Trainable params: 7525 (29.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch

In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
# After training
model.save('/content/drive/MyDrive/Depression/Android Dataset/Reading_RNN.h5')  # Save the model

  saving_api.save_model(


In [None]:
# testing the saved model
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the saved RNN model from your drive
model_load_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_RNN.h5'  # Ensure this matches the path where you saved the model
loaded_model = load_model(model_load_path)
print("Model loaded successfully.")

# Load new test data
new_test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv'  # Update this path
new_test_data = pd.read_csv(new_test_data_path)

# Assuming your test CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_new_test = new_test_data.drop(columns=['Class', 'name']).values
y_new_test = new_test_data['Class'].values

# Standardize the features using the same approach as was used for the training data
scaler = StandardScaler()
X_new_test_scaled = scaler.fit_transform(X_new_test)  # Note: In practice, use the same scaler as for the training data

# Reshape the data for RNN input, assuming each feature is treated as a separate timestep
X_new_test_scaled_rnn = X_new_test_scaled.reshape((X_new_test_scaled.shape[0], X_new_test_scaled.shape[1], 1))

# Make predictions using the loaded RNN model for the new testing set
y_new_test_pred_rnn = (loaded_model.predict(X_new_test_scaled_rnn) > 0.5).astype(int)

# Calculate and print the classification report and accuracy for the RNN model on the new test set
print("RNN Model Classification Report for New Testing Set:")
print(classification_report(y_new_test, y_new_test_pred_rnn))
accuracy_new_test_rnn = accuracy_score(y_new_test, y_new_test_pred_rnn)
print(f"\nAccuracy for RNN on New Testing Set: {accuracy_new_test_rnn:.4f}")


Model loaded successfully.
RNN Model Classification Report for New Testing Set:
              precision    recall  f1-score   support

           0       0.63      0.56      0.59       102
           1       0.72      0.77      0.75       150

    accuracy                           0.69       252
   macro avg       0.67      0.67      0.67       252
weighted avg       0.68      0.69      0.68       252


Accuracy for RNN on New Testing Set: 0.6865


# CNN

In [None]:
# # WITHOUT FUNCTIONAL API
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Load training and testing data from CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv')

# # Prepare features (X) and labels (y) for training and testing sets
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Reshape the data for CNN input
# # Adjust the shape based on your actual feature extraction
# X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# # Initialize the CNN model
# model_cnn = Sequential()
# model_cnn.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
# model_cnn.add(MaxPooling1D(pool_size=2))
# model_cnn.add(Flatten())
# model_cnn.add(Dense(50, activation='relu'))
# model_cnn.add(Dropout(0.5))
# model_cnn.add(Dense(1, activation='sigmoid'))

# # Compile and train the CNN model
# model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

# # Make predictions using the CNN for the testing set
# y_test_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)

# # Calculate and print the classification report and accuracy for the CNN model
# print("CNN Model Classification Report for Testing Set:")
# print(classification_report(y_test, y_test_pred_cnn))
# accuracy_test_cnn = accuracy_score(y_test, y_test_pred_cnn)
# print(f"\nAccuracy for CNN Testing Set: {accuracy_test_cnn:.4f}")


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv')

# Prepare features (X) and labels (y) for training and testing sets
X_train = train_data.drop(columns=['Class', 'name']).values
y_train = train_data['Class'].values
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the data for CNN input
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

def create_functional_cnn_model(input_shape):
    """
    Creates a CNN model for a binary classification task using the Functional API.

    Parameters:
    - input_shape: The shape of the input data.

    Returns:
    - model: The CNN model defined using the Functional API.
    """
    inputs = Input(shape=input_shape)
    x = Conv1D(filters=64, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Flatten()(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the CNN model with the training data shape
model_cnn = create_functional_cnn_model(X_train_cnn.shape[1:])

# Print model summary to verify architecture
model_cnn.summary()

# Train the model
history = model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluate the model on the test set
y_test_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)
print("Functional API CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred_cnn))
accuracy_test_cnn = accuracy_score(y_test, y_test_pred_cnn)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test_cnn:.4f}")


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 conv1d (Conv1D)             (None, 510, 64)           256       
                                                                 
 max_pooling1d (MaxPooling1  (None, 255, 64)           0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 16320)             0         
                                                                 
 dense_2 (Dense)             (None, 50)                816050    
                                                                 
 dropout_1 (Dropout)         (None, 50)                0         
                                                           

In [None]:
# After training
model.save('/content/drive/MyDrive/Depression/Model/my_rnn_model.h5')  # Save the model

  saving_api.save_model(


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
# MODEL TESTING AFTER LOADING
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the saved model from Google Drive
model_save_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_CNN.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features (assuming this was done before training as well)
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Make predictions on the test data
y_pred = (loaded_model.predict(X_test_scaled) > 0.5).astype(int)

# Calculate and print the classification report and accuracy
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_pred))
accuracy_test = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test:.4f}")


Model loaded successfully.
CNN Model Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.83      0.90      0.86       102
           1       0.93      0.87      0.90       150

    accuracy                           0.88       252
   macro avg       0.88      0.89      0.88       252
weighted avg       0.89      0.88      0.89       252


Accuracy for CNN Testing Set: 0.8849


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the saved model from Google Drive
model_save_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_CNN.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Reading_test.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features (assuming this was done before training as well)
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Evaluate the model on the test data
loss, accuracy = loaded_model.evaluate(X_test_scaled, y_test, verbose=1)

# Print the results
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Model loaded successfully.
Test Loss: 0.2511
Test Accuracy: 0.8849


# **Interview**

# CNN

In [None]:
# # WITHOUT FUNCTIONAL API
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Load training and testing data from CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Interview_test.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Interview_train.csv')

# # Prepare features (X) and labels (y) for training and testing sets
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Reshape the data for CNN input
# # Adjust the shape based on your actual feature extraction
# X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# # Initialize the CNN model
# model_cnn = Sequential()
# model_cnn.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
# model_cnn.add(MaxPooling1D(pool_size=2))
# model_cnn.add(Flatten())
# model_cnn.add(Dense(50, activation='relu'))
# model_cnn.add(Dropout(0.5))
# model_cnn.add(Dense(1, activation='sigmoid'))

# # Compile and train the CNN model
# model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

# # Make predictions using the CNN for the testing set
# y_test_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)

# # Calculate and print the classification report and accuracy for the CNN model
# print("CNN Model Classification Report for Testing Set:")
# print(classification_report(y_test, y_test_pred_cnn))
# accuracy_test_cnn = accuracy_score(y_test, y_test_pred_cnn)
# print(f"\nAccuracy for CNN Testing Set: {accuracy_test_cnn:.4f}")


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')
# Prepare features (X) and labels (y) for training and testing sets
X_train = train_data.drop(columns=['Class', 'name']).values
y_train = train_data['Class'].values
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the data for CNN input
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

def create_functional_cnn_model(input_shape):
    """
    Creates a CNN model for a binary classification task using the Functional API.

    Parameters:
    - input_shape: The shape of the input data.

    Returns:
    - model: The CNN model defined using the Functional API.
    """
    inputs = Input(shape=input_shape)
    x = Conv1D(filters=64, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Flatten()(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the CNN model with the training data shape
model_cnn = create_functional_cnn_model(X_train_cnn.shape[1:])

# Print model summary to verify architecture
model_cnn.summary()

# Train the model
history = model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluate the model on the test set
y_test_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)
print("Functional API CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred_cnn))
accuracy_test_cnn = accuracy_score(y_test, y_test_pred_cnn)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test_cnn:.4f}")


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 conv1d (Conv1D)             (None, 510, 64)           256       
                                                                 
 max_pooling1d (MaxPooling1  (None, 255, 64)           0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 16320)             0         
                                                                 
 dense (Dense)               (None, 50)                816050    
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                             

In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
# After training
model.save('/content/drive/MyDrive/Depression/Android Dataset/Interview_CNN.h5')  # Save the model

  saving_api.save_model(


In [None]:
# MODEL TESTING AFTER LOADING
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the saved model from Google Drive
model_save_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_CNN.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features (assuming this was done before training as well)
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Make predictions on the test data
y_pred = (loaded_model.predict(X_test_scaled) > 0.5).astype(int)

# Calculate and print the classification report and accuracy
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_pred))
accuracy_test = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test:.4f}")


Model loaded successfully.
CNN Model Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.63      0.60      0.61       624
           1       0.72      0.75      0.73       875

    accuracy                           0.69      1499
   macro avg       0.68      0.67      0.67      1499
weighted avg       0.68      0.69      0.68      1499


Accuracy for CNN Testing Set: 0.6858


# RNN

In [None]:
# # WITHOUT FUNCTIONAL API
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Load training and testing data from CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Interview_train.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Interview_test.csv')

# # Prepare features (X) and labels (y) for training and testing sets
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Reshape the data for RNN input
# # RNNs require 3D input of shape [samples, timesteps, features]. Here, we'll treat each feature as a timestep.
# # This might not be the optimal approach for every problem and depends on how your data is structured.
# X_train_rnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test_rnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# # Initialize the RNN model
# model_rnn = Sequential()
# model_rnn.add(SimpleRNN(units=64, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2]), return_sequences=False))
# model_rnn.add(Dense(50, activation='relu'))
# model_rnn.add(Dropout(0.5))
# model_rnn.add(Dense(1, activation='sigmoid'))

# # Compile and train the RNN model
# model_rnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# history = model_rnn.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

# # Make predictions using the RNN for the testing set
# y_test_pred_rnn = (model_rnn.predict(X_test_rnn) > 0.5).astype(int)

# # Calculate and print the classification report and accuracy for the RNN model
# print("RNN Model Classification Report for Testing Set:")
# print(classification_report(y_test, y_test_pred_rnn))
# accuracy_test_rnn = accuracy_score(y_test, y_test_pred_rnn)
# print(f"\nAccuracy for RNN Testing Set: {accuracy_test_rnn:.4f}")


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')
# Function to preprocess the dataset
def preprocess_data(train_data, test_data):
    # Prepare features and labels
    X_train = train_data.drop(columns=['Class', 'name']).values
    y_train = train_data['Class'].values
    X_test = test_data.drop(columns=['Class', 'name']).values
    y_test = test_data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Reshape data for RNN: (samples, timesteps, features)
    X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
    X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

    return X_train_rnn, y_train, X_test_rnn, y_test

# Preprocess the data
X_train_rnn, y_train, X_test_rnn, y_test = preprocess_data(train_data, test_data)

# Function to create RNN model using the Functional API
def create_functional_model(X_train_shape, rnn_units=64, dense_units=50, dropout_rate=0.5):
    inputs = Input(shape=(X_train_shape[1], X_train_shape[2]))
    x = SimpleRNN(units=rnn_units, activation='relu', return_sequences=False)(inputs)
    x = Dense(dense_units, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the model with the training data shape, RNN units, dense units, and dropout rate
model = create_functional_model(X_train_rnn.shape, rnn_units=64, dense_units=50, dropout_rate=0.5)

# Print model summary to verify architecture
model.summary()

# Train the model
history = model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

# Evaluate the model on the test set
y_test_pred = (model.predict(X_test_rnn) > 0.5).astype(int)
print("Functional API Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred))
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"\nAccuracy for Functional API Model Testing Set: {accuracy_test:.4f}")


Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 64)                4224      
                                                                 
 dense_6 (Dense)             (None, 50)                3250      
                                                                 
 dropout_3 (Dropout)         (None, 50)                0         
                                                                 
 dense_7 (Dense)             (None, 1)                 51        
                                                                 
Total params: 7525 (29.39 KB)
Trainable params: 7525 (29.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epo

In [None]:
# /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
# After training
model.save('/content/drive/MyDrive/Depression/Android Dataset/Interview_RNN.h5')  # Save the model

  saving_api.save_model(


In [None]:
# testing the saved model
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the saved RNN model from your drive
model_load_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_RNN.h5'  # Ensure this matches the path where you saved the model
loaded_model = load_model(model_load_path)
print("Model loaded successfully.")

# Load new test data
new_test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'  # Update this path
new_test_data = pd.read_csv(new_test_data_path)

# Assuming your test CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_new_test = new_test_data.drop(columns=['Class', 'name']).values
y_new_test = new_test_data['Class'].values

# Standardize the features using the same approach as was used for the training data
scaler = StandardScaler()
X_new_test_scaled = scaler.fit_transform(X_new_test)  # Note: In practice, use the same scaler as for the training data

# Reshape the data for RNN input, assuming each feature is treated as a separate timestep
X_new_test_scaled_rnn = X_new_test_scaled.reshape((X_new_test_scaled.shape[0], X_new_test_scaled.shape[1], 1))

# Make predictions using the loaded RNN model for the new testing set
y_new_test_pred_rnn = (loaded_model.predict(X_new_test_scaled_rnn) > 0.5).astype(int)

# Calculate and print the classification report and accuracy for the RNN model on the new test set
print("RNN Model Classification Report for New Testing Set:")
print(classification_report(y_new_test, y_new_test_pred_rnn))
accuracy_new_test_rnn = accuracy_score(y_new_test, y_new_test_pred_rnn)
print(f"\nAccuracy for RNN on New Testing Set: {accuracy_new_test_rnn:.4f}")

Model loaded successfully.
RNN Model Classification Report for New Testing Set:
              precision    recall  f1-score   support

           0       0.54      0.90      0.68       624
           1       0.87      0.46      0.60       875

    accuracy                           0.65      1499
   macro avg       0.71      0.68      0.64      1499
weighted avg       0.73      0.65      0.63      1499


Accuracy for RNN on New Testing Set: 0.6451


# **EDAIC**

# CNN

In [None]:

# !pip install pandas numpy scikit-learn tensorflow
# # 75%
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Function to pad features to the next perfect square
# def pad_features_to_square(features):
#     num_features = features.shape[1]
#     next_square = np.ceil(np.sqrt(num_features))**2
#     padding_size = int(next_square - num_features)
#     return np.pad(features, ((0, 0), (0, padding_size)), 'constant')

# # Function to prepare data for CNN
# def prepare_data_for_cnn(features, size):
#     padded_features = pad_features_to_square(features)
#     reshaped_features = padded_features.reshape((features.shape[0], size, size, 1))
#     return reshaped_features


# # Load data from the CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv')
# validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')


# # Drop rows with NaN values from all datasets
# train_data.dropna(inplace=True)
# validation_data.dropna(inplace=True)
# test_data.dropna(inplace=True)

# # Prepare features and labels
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_validation = validation_data.drop(columns=['Class', 'name']).values
# y_validation = validation_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler().fit(np.vstack((X_train, X_validation, X_test)))
# X_train_scaled = scaler.transform(X_train)
# X_validation_scaled = scaler.transform(X_validation)
# X_test_scaled = scaler.transform(X_test)

# # Calculate size for reshaping based on training data
# size = int(np.sqrt(pad_features_to_square(X_train_scaled).shape[1]))

# # Prepare the data for CNN
# X_train_cnn = prepare_data_for_cnn(X_train_scaled, size)
# X_validation_cnn = prepare_data_for_cnn(X_validation_scaled, size)
# X_test_cnn = prepare_data_for_cnn(X_test_scaled, size)

# # CNN model definition with dropout to reduce overfitting
# def create_cnn_model(input_shape):
#     model = Sequential([
#         Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
#         MaxPooling2D(pool_size=(2, 2)),
#         Dropout(0.25),
#         Conv2D(64, (3, 3), activation='relu'),
#         MaxPooling2D(pool_size=(2, 2)),
#         Dropout(0.25),
#         Flatten(),
#         Dense(128, activation='relu'),
#         Dropout(0.5),
#         Dense(1, activation='sigmoid')
#     ])
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#     return model

# # Initialize and train the CNN model
# model = create_cnn_model((size, size, 1))
# model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, validation_data=(X_validation_cnn, y_validation))

# # Evaluate the model on the test data
# y_pred = (model.predict(X_test_cnn) > 0.5).astype(int)
# print("Classification Report for Testing Set:")
# print(classification_report(y_test, y_pred))
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy for Testing Set: {accuracy:.4f}")


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training, validation, and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv')  # Adjusted for training data
validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv')  # New validation data
test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

# Prepare features (X) and labels (y) for training, validation, and testing sets
X_train = train_data.drop(columns=['Class', 'name']).values
y_train = train_data['Class'].values

X_validation = validation_data.drop(columns=['Class', 'name']).values
y_validation = validation_data['Class'].values

X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Standardize the features
scaler = StandardScaler()
scaler.fit(np.vstack((X_train, X_validation, X_test)))  # Fit on all available data
X_train_scaled = scaler.transform(X_train)
X_validation_scaled = scaler.transform(X_validation)
X_test_scaled = scaler.transform(X_test)

# Reshape the data for CNN input
X_train_cnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_validation_cnn = X_validation_scaled.reshape((X_validation_scaled.shape[0], X_validation_scaled.shape[1], 1))
X_test_cnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

def create_functional_cnn_model(input_shape):
    """
    Creates a CNN model for a binary classification task using the Functional API.

    Parameters:
    - input_shape: The shape of the input data.

    Returns:
    - model: The CNN model defined using the Functional API.
    """
    inputs = Input(shape=input_shape)
    x = Conv1D(filters=64, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Flatten()(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the CNN model with the training data shape
model_cnn = create_functional_cnn_model(X_train_cnn.shape[1:])

# Print model summary to verify architecture
model_cnn.summary()

# Train the model with validation data
history = model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_validation_cnn, y_validation))

# Evaluate the model on the test set
y_test_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred_cnn))
accuracy_test_cnn = accuracy_score(y_test, y_test_pred_cnn)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test_cnn:.4f}")


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 conv1d_2 (Conv1D)           (None, 510, 64)           256       
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 255, 64)           0         
 g1D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 16320)             0         
                                                                 
 dense_4 (Dense)             (None, 50)                816050    
                                                                 
 dropout_2 (Dropout)         (None, 50)                0         
                                                           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
111# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

111

In [None]:
# After training
model_cnn.save('/content/drive/MyDrive/Depression/EDAIC Dataset/MODEL.h5')  # Save the model


  saving_api.save_model(


In [None]:
# MODEL TESTING AFTER LOADING
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import load_model

# Load the saved model from Google Drive
model_save_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/MODEL.h5'  # Update with your model's path
loaded_model = load_model(model_save_path)
print("Model loaded successfully.")

# Load and prepare the test data
test_data_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'  # Update with your test data's path
test_data = pd.read_csv(test_data_path)

# Assuming your CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_test = test_data.drop(columns=['Class', 'name']).values
y_test = test_data['Class'].values

# Scale the features (assuming this was done before training as well)
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Reshape the data if your model expects a specific input shape (example for CNN)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Make predictions on the test data
y_pred = (loaded_model.predict(X_test_scaled) > 0.5).astype(int)

# Calculate and print the classification report and accuracy
print("CNN Model Classification Report for Testing Set:")
print(classification_report(y_test, y_pred))
accuracy_test = accuracy_score(y_test, y_pred)
print(f"\nAccuracy for CNN Testing Set: {accuracy_test:.4f}")


Model loaded successfully.
CNN Model Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.78      1.00      0.87      1897
           1       0.00      0.00      0.00       549

    accuracy                           0.78      2446
   macro avg       0.39      0.50      0.44      2446
weighted avg       0.60      0.78      0.68      2446


Accuracy for CNN Testing Set: 0.7756


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# RNN

In [None]:
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import KFold
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense, Dropout
# from sklearn.metrics import classification_report, accuracy_score

# # Load data from the CSV files
# train_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/training_labels.csv')
# validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

# # Drop rows with NaN values from all datasets
# train_data.dropna(inplace=True)
# validation_data.dropna(inplace=True)
# test_data.dropna(inplace=True)

# # Prepare features and labels
# X_train = train_data.drop(columns=['Class', 'name']).values
# y_train = train_data['Class'].values
# X_validation = validation_data.drop(columns=['Class', 'name']).values
# y_validation = validation_data['Class'].values
# X_test = test_data.drop(columns=['Class', 'name']).values
# y_test = test_data['Class'].values

# # Standardize the features
# scaler = StandardScaler().fit(np.vstack((X_train, X_validation, X_test)))
# X_train_scaled = scaler.transform(X_train)
# X_validation_scaled = scaler.transform(X_validation)
# X_test_scaled = scaler.transform(X_test)

# # No need to pad features to a perfect square for RNNs
# # Reshape data for RNN input
# num_features = X_train_scaled.shape[1]  # Number of features for each sample
# X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], num_features, 1))
# X_validation_rnn = X_validation_scaled.reshape((X_validation_scaled.shape[0], num_features, 1))
# X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], num_features, 1))

# # Function to create an RNN (LSTM) model
# def create_rnn_model(input_shape):
#     model = Sequential([
#         LSTM(64, return_sequences=True, input_shape=input_shape),
#         Dropout(0.25),
#         LSTM(32),
#         Dropout(0.25),
#         Dense(128, activation='relu'),
#         Dropout(0.5),
#         Dense(1, activation='sigmoid')
#     ])
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#     return model

# # 5-fold cross-validation configuration
# kf = KFold(n_splits=5, shuffle=True, random_state=42)

# # Perform 5-fold cross-validation
# fold_no = 1
# for train_index, val_index in kf.split(X_train_rnn, y_train):
#     # Split data into training and validation for the current fold
#     X_train_fold, X_val_fold = X_train_rnn[train_index], X_train_rnn[val_index]
#     y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

#     # Create a new RNN model for the current fold
#     model = create_rnn_model((num_features, 1))

#     # Train the model
#     print(f'Training on fold {fold_no}...')
#     model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=64, validation_data=(X_val_fold, y_val_fold))

#     fold_no += 1

# # Optionally, retrain the model on the full training dataset or select the best model from folds
# # This example proceeds with the model from the last fold for simplicity

# # Evaluate the model on the test data
# y_pred = (model.predict(X_test_rnn) > 0.5).astype(int)
# print("Classification Report for Testing Set:")
# print(classification_report(y_test, y_pred))
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy for Testing Set: {accuracy:.4f}")


In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Load training, validation, and testing data from CSV files
train_data = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Reading_train.csv')
validation_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/validation_labels.csv')  # Adjusted for validation data
test_data = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

# Function to preprocess the dataset
def preprocess_data(train_data, validation_data, test_data):
    # Prepare features and labels for training data
    X_train = train_data.drop(columns=['Class', 'name']).values
    y_train = train_data['Class'].values

    # Prepare features and labels for validation data
    X_validation = validation_data.drop(columns=['Class', 'name']).values
    y_validation = validation_data['Class'].values

    # Prepare features and labels for testing data
    X_test = test_data.drop(columns=['Class', 'name']).values
    y_test = test_data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    scaler.fit(np.vstack((X_train, X_validation, X_test)))  # Fit on all available data
    X_train_scaled = scaler.transform(X_train)
    X_validation_scaled = scaler.transform(X_validation)
    X_test_scaled = scaler.transform(X_test)

    # Reshape data for RNN: (samples, timesteps, features)
    X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
    X_validation_rnn = X_validation_scaled.reshape((X_validation_scaled.shape[0], X_validation_scaled.shape[1], 1))
    X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

    return X_train_rnn, y_train, X_validation_rnn, y_validation, X_test_rnn, y_test

# Preprocess the data including validation set
X_train_rnn, y_train, X_validation_rnn, y_validation, X_test_rnn, y_test = preprocess_data(train_data, validation_data, test_data)

# Function to create RNN model using the Functional API
def create_functional_model(X_train_shape, rnn_units=64, dense_units=50, dropout_rate=0.5):
    inputs = Input(shape=(X_train_shape[1], X_train_shape[2]))
    x = SimpleRNN(units=rnn_units, activation='relu', return_sequences=False)(inputs)
    x = Dense(dense_units, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create the model with the training data shape, RNN units, dense units, and dropout rate
model = create_functional_model(X_train_rnn.shape, rnn_units=64, dense_units=50, dropout_rate=0.5)

# Print model summary to verify architecture
model.summary()

# Train the model with validation data
history = model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_validation_rnn, y_validation))

# Evaluate the model on the test set
y_test_pred = (model.predict(X_test_rnn) > 0.5).astype(int)
print("Functional API Model Classification Report for Testing Set:")
print(classification_report(y_test, y_test_pred))
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"\nAccuracy for Functional API Model Testing Set: {accuracy_test:.4f}")


Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 512, 1)]          0         
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 64)                4224      
                                                                 
 dense_10 (Dense)            (None, 50)                3250      
                                                                 
 dropout_5 (Dropout)         (None, 50)                0         
                                                                 
 dense_11 (Dense)            (None, 1)                 51        
                                                                 
Total params: 7525 (29.39 KB)
Trainable params: 7525 (29.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epo

In [None]:
# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/EDIAC_RNN.h5')


  saving_api.save_model(


In [None]:
# testing the saved model
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the saved RNN model from your drive
model_load_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/EDIAC_RNN.h5'  # Ensure this matches the path where you saved the model
loaded_model = load_model(model_load_path)
print("Model loaded successfully.")

# Load new test data
new_test_data_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'  # Update this path
new_test_data = pd.read_csv(new_test_data_path)

# Assuming your test CSV has specific columns to drop (like 'name') and a 'Class' column for labels
X_new_test = new_test_data.drop(columns=['Class', 'name']).values
y_new_test = new_test_data['Class'].values

# Standardize the features using the same approach as was used for the training data
scaler = StandardScaler()
X_new_test_scaled = scaler.fit_transform(X_new_test)  # Note: In practice, use the same scaler as for the training data

# Reshape the data for RNN input, assuming each feature is treated as a separate timestep
X_new_test_scaled_rnn = X_new_test_scaled.reshape((X_new_test_scaled.shape[0], X_new_test_scaled.shape[1], 1))

# Make predictions using the loaded RNN model for the new testing set
y_new_test_pred_rnn = (loaded_model.predict(X_new_test_scaled_rnn) > 0.5).astype(int)

# Calculate and print the classification report and accuracy for the RNN model on the new test set
print("RNN Model Classification Report for New Testing Set:")
print(classification_report(y_new_test, y_new_test_pred_rnn))
accuracy_new_test_rnn = accuracy_score(y_new_test, y_new_test_pred_rnn)
print(f"\nAccuracy for RNN on New Testing Set: {accuracy_new_test_rnn:.4f}")


Model loaded successfully.
RNN Model Classification Report for New Testing Set:
              precision    recall  f1-score   support

           0       0.87      0.55      0.67      1897
           1       0.31      0.71      0.44       549

    accuracy                           0.59      2446
   macro avg       0.59      0.63      0.56      2446
weighted avg       0.74      0.59      0.62      2446


Accuracy for RNN on New Testing Set: 0.5871


# **FINE-TUNING INTER DATASETS**

# taking the weights/trained model on EDAIC-CNN and fine tune on Android interview train set

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load the dataset
interview_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')

# Prepare the dataset by dropping the 'name' column and separating features and labels
X = interview_df.drop(columns=['name', 'Class']).values
y = interview_df['Class'].values

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Load the pre-trained model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/EDAIC_CNN.h5')

# Optionally, you can make some of the layers non-trainable if you wish
# for layer in model.layers[:-n]: # Replace n with the number of layers you want to fine-tune
#     layer.trainable = False

# Compile the model with a smaller learning rate
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Use early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Fine-tune the model on the new dataset
model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=10, callbacks=[early_stopping])

# Save the fine-tuned model
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/Fine_tuned_EDAIC_CNN.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


# **alllll**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping

# Load the training and validation dataset
interview_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')

# Prepare the dataset by dropping the 'name' column and separating features and labels
X = interview_df.drop(columns=['name', 'Class']).values
y = interview_df['Class'].values

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Load the pre-trained model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/EDAIC_CNN.h5')

# Compile the model with a smaller learning rate
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Use early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Fine-tune the model on the new dataset
model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=10, callbacks=[early_stopping])

# Save the fine-tuned model
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/testFine_tuned_EDAIC_CNN.h5')

# Load and prepare the test dataset (Assuming you have a separate test set)
# Replace 'Interview_test.csv' with your actual test dataset file if different
test_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')  # Update path if needed
X_test = test_df.drop(columns=['name', 'Class']).values
y_test = test_df['Class'].values

# Scale the features of the test set using the same scaler as for training
X_test_scaled = scaler.transform(X_test)

# Load the fine-tuned model
fine_tuned_model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/testFine_tuned_EDAIC_CNN.h5')

# Make predictions on the test set
predictions = fine_tuned_model.predict(X_test_scaled)
# Convert probabilities to class labels based on a threshold
predicted_classes = (predictions > 0.5).astype(int)

# Calculate and print the accuracy on the test set
test_accuracy = accuracy_score(y_test, predicted_classes)
print(f'Test Accuracy: {test_accuracy}')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Test Accuracy: 0.7798532354903269


# With the android fine-tuned test it again on EDAIC test set

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the fine-tuned model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/testFine_tuned_EDAIC_CNN.h5')

# Load the EDAIC test dataset
edaic_test_df = pd.read_csv('/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv')

# Assuming the structure is similar to the interview dataset
X_test = edaic_test_df.drop(columns=['name', 'Class']).values
y_test = edaic_test_df['Class'].values

# Scale the features (use the same scaler parameters as used for the training data)
# IMPORTANT: Fit the scaler on training data and then transform test data with it
# For demonstration, we're assuming you have saved the scaler or its parameters
scaler = StandardScaler().fit(X_train)  # X_train should be replaced with your training data or load the scaler
X_test_scaled = scaler.transform(X_test)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


# <60%


Test Loss: 0.7440702319145203
Test Accuracy: 0.7260833978652954


# TENSOR FLOW AVAILABLE

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load the dataset from your drive
interview_df = pd.read_csv('/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv')

# Prepare the dataset by dropping the 'name' column and separating features (X) and labels (y)
X = interview_df.drop(columns=['name', 'Class']).values
y = interview_df['Class'].values

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features with StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Load the pre-trained CNN model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/EDAIC_CNN.h5')

# If desired, set some layers to non-trainable (e.g., for feature extraction only)
# Example: Freeze all but the last n layers
# for layer in model.layers[:-n]:
#     layer.trainable = False

# Compile the model with a smaller learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Use EarlyStopping to halt training when validation loss stops improving
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Fine-tune the model on the new dataset
model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    callbacks=[early_stopping]
)

# Save the fine-tuned model to your drive
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/Fine_tuned_EDAIC_CNN.h5')


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Assuming you've uploaded the fine-tuned model and the dataset to your Colab workspace
model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/Fine_tuned_EDAIC_CNN.h5'
test_data_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'

# Load the fine-tuned model
model = load_model(model_path)

# Load the EDAIC test dataset
edaic_test_df = pd.read_csv(test_data_path)

# Prepare the dataset: extract features and labels, and scale the features
X_test = edaic_test_df.drop(columns=['name', 'Class']).values
y_test = edaic_test_df['Class'].values

# Scale the features using the same scaler parameters as used for the training data
# IMPORTANT: This assumes the scaler was fitted on the training data
scaler = StandardScaler().fit(X_test)  # Normally, you should use the scaler fitted on the training data
X_test_scaled = scaler.transform(X_test)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


Test Loss: 1.0930414199829102
Test Accuracy: 0.5024529695510864


# Without fine-tuning- taking the weights/trained model on EDAIC-CNN and test on Android interview train set


In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/EDAIC_CNN.h5'
model = load_model(model_path)

# Load the Android dataset CSV that already contains the extracted features
android_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'
android_df = pd.read_csv(android_data_path)

# Assuming your CSV has the features in columns named 'feature1', 'feature2', ..., 'featureN'
# Adjust the column names according to your CSV file structure
# If your features are not in separate columns but rather concatenated in one, you'll need to split them accordingly
features_columns = [col for col in android_df.columns if col.startswith('feature')]
X_test = android_df[features_columns].values

# If you have labels in your CSV and they need to be tested against the model's predictions
# Assuming the label is in a column named 'label'
y_test = android_df['Class'].values

# Reshape X_test if your model expects a specific input shape (e.g., adding a channel dimension for CNNs)
# This reshape depends on your model's architecture
# Example for adding a single channel dimension if needed
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Evaluate the model on the Android dataset
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')


Test Loss: 12.844561576843262
Test Accuracy: 0.41827884316444397


# Testing EDAIC trained model on Android dataset

In [None]:
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Adjusted function to preprocess your data
def preprocess_data(df):
    # Selecting features (all columns except the first two) and labels (second column)
    X = df.iloc[:, 2:].values  # Adjust if your features start from a different column
    y = df.iloc[:, 1].values  # Labels are in the second column

    # Standardizing features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# Load test data
csv_file_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'

test_data = pd.read_csv(csv_file_path)

# Preprocess the data
X_test, y_true = preprocess_data(test_data)

# Load the CNN model
model_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/MODEL.h5'  # Make sure to update this path to your model's location

model = load_model(model_file_path)

# Make predictions with the model
predictions = model.predict(X_test)

# # Assuming a binary classification task and predictions being probabilities,
# # convert probabilities to binary outcomes based on a threshold (e.g., 0.5)
# predictions_binary = (predictions > 0.5).astype("int32").flatten()

# Calculate and print the accuracy
accuracy = accuracy_score(y_true, predictions_binary)
print(f"Model accuracy on the test set: {accuracy * 100:.2f}%")


Model accuracy on the test set: 41.63%


# now again training the model.h5 by android cnn

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Function to preprocess data (new data in this case)
def preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Class', 'name']).values
    y = data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Reshape the data for CNN input
    X_cnn = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

    return X_cnn, y

# Load new training and testing data
new_train_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv'  # Update this path
new_test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'    # Update this path

X_train_new, y_train_new = preprocess_data(new_train_data_path)
X_test_new, y_test_new = preprocess_data(new_test_data_path)

# Load the existing model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/MODEL.h5')

# Optionally, if you want to adjust the model's learning rate or other parameters:
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Further train the model on your new data
model.fit(X_train_new, y_train_new, epochs=10, batch_size=32, validation_data=(X_test_new, y_test_new))

# Evaluate the updated model on the new test set
y_test_pred = (model.predict(X_test_new) > 0.5).astype(int)
print("Updated Model Classification Report for New Testing Set:")
print(classification_report(y_test_new, y_test_pred))
accuracy_test = accuracy_score(y_test_new, y_test_pred)
print(f"\nUpdated Model Accuracy for New Testing Set: {accuracy_test:.4f}")

# Save the updated model
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Updated Model Classification Report for New Testing Set:
              precision    recall  f1-score   support

           0       0.42      1.00      0.59       624
           1       0.00      0.00      0.00       875

    accuracy                           0.42      1499
   macro avg       0.21      0.50      0.29      1499
weighted avg       0.17      0.42      0.24      1499


Updated Model Accuracy for New Testing Set: 0.4163


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  saving_api.save_model(


In [None]:

# testing the new updated model on ANDROID TEST
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Adjusted function to preprocess your data
def preprocess_data(df):
    # Selecting features (all columns except the first two) and labels (second column)
    X = df.iloc[:, 2:].values  # Adjust if your features start from a different column
    y = df.iloc[:, 1].values  # Labels are in the second column

    # Standardizing features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# Load test data
csv_file_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'

test_data = pd.read_csv(csv_file_path)

# Preprocess the data
X_test, y_true = preprocess_data(test_data)

# Load the CNN model
model_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5'  # Make sure to update this path to your model's location

model = load_model(model_file_path)

# Make predictions with the model
predictions = model.predict(X_test)

# Assuming a binary classification task and predictions being probabilities,
# convert probabilities to binary outcomes based on a threshold (e.g., 0.5)
predictions_binary = (predictions > 0.5).astype("int32").flatten()

# Calculate and print the accuracy
accuracy = accuracy_score(y_true, predictions_binary)
print(f"Model accuracy on the test set: {accuracy * 100:.2f}%")


Model accuracy on the test set: 41.63%


In [None]:
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Adjusted function to preprocess your data
def preprocess_data(df):
    # Selecting features (all columns except the first two) and labels (second column)
    X = df.iloc[:, 2:].values  # Adjust if your features start from a different column
    y = df.iloc[:, 1].values  # Labels are in the second column

    # Standardizing features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# Load test data
csv_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'
test_data = pd.read_csv(csv_file_path)

# Preprocess the data
X_test, y_true = preprocess_data(test_data)

# Load the CNN model
model_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5'  # Make sure to update this path to your model's location
model = load_model(model_file_path)

# Make predictions with the model
predictions = model.predict(X_test)

# Convert probabilities to binary outcomes based on a threshold (e.g., 0.5)
predictions_binary = (predictions > 0.5).astype("int32").flatten()

# Calculate and print the accuracy
accuracy = accuracy_score(y_true, predictions_binary)
print(f"Model accuracy on the test set: {accuracy * 100:.2f}%")


Model accuracy on the test set: 77.56%


In [None]:
# finetuned

import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Accuracy

# Function to preprocess the Android-specific data
def preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Class', 'name']).values  # Adjust these column names based on your dataset
    y = data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Reshape the data for CNN input if necessary
    X_reshaped = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

    return X_reshaped, y

# Load the Android-specific training and testing data
train_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv'  # Update this path
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'    # Update this path
X_train_android, y_train_android = preprocess_data(train_data_path)
X_test_android, y_test_android = preprocess_data(test_data_path)

# Load the pre-trained model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5')

# Decide which layers to unfreeze for the fine-tuning process
# Example: Unfreezing the last 3 layers. Adjust this based on your model's architecture.
for layer in model.layers[-3:]:
    layer.trainable = True

# Compile the model with a smaller learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=0.0001), loss=BinaryCrossentropy(), metrics=[Accuracy()])

# Fine-tune the model on the new Android-specific data
model.fit(X_train_android, y_train_android, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the fine-tuned model on the Android-specific test data
loss, accuracy = model.evaluate(X_test_android, y_test_android)
print(f"Fine-tuned Model Loss: {loss}, Fine-tuned Model Accuracy: {accuracy}")

# Save the fine-tuned model
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/finetuned.h5')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fine-tuned Model Loss: nan, Fine-tuned Model Accuracy: 0.0


  saving_api.save_model(


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score

# Function to preprocess the test data
def preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Class', 'name']).values  # Adjust these column names based on your dataset
    y = data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Reshape the data for CNN input if necessary
    X_reshaped = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

    return X_reshaped, y

# Load the fine-tuned model
model = load_model('/content/drive/MyDrive/Depression/EDAIC Dataset/finetuned.h5')

# Load and preprocess the test data
test_data_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'  # Update this path to your actual test.csv file path
X_test, y_test = preprocess_data(test_data_path)

# Make predictions with the fine-tuned model
predictions = model.predict(X_test)
predictions_binary = (predictions > 0.5).astype("int32").flatten()  # Adjust threshold and method based on your problem

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions_binary)
print(f"Accuracy of the fine-tuned model on test.csv: {accuracy:.4f}")


Accuracy of the fine-tuned model on test.csv: 0.7756


In [None]:
# without finetuning
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5'
model = load_model(model_path)

# Load the Android dataset CSV that already contains the extracted features
android_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'
android_df = pd.read_csv(android_data_path)

# Assuming your CSV has the features in columns named 'feature1', 'feature2', ..., 'featureN'
# Adjust the column names according to your CSV file structure
# If your features are not in separate columns but rather concatenated in one, you'll need to split them accordingly
features_columns = [col for col in android_df.columns if col.startswith('feature')]
X_test = android_df[features_columns].values

# If you have labels in your CSV and they need to be tested against the model's predictions
# Assuming the label is in a column named 'label'
y_test = android_df['Class'].values

# Reshape X_test if your model expects a specific input shape (e.g., adding a channel dimension for CNNs)
# This reshape depends on your model's architecture
# Example for adding a single channel dimension if needed
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Evaluate the model on the Android dataset
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')


Test Loss: nan
Test Accuracy: 0.41627752780914307


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.optimizers import Adam

# Function to preprocess data (new data in this case)
def preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Class', 'name']).values  # Adjust 'name' column if not applicable
    y = data['Class'].values

    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Reshape the data for CNN input
    X_cnn = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

    return X_cnn, y

# Paths to your new training and testing data
new_train_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv'  # Update this path
new_test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'    # Update this path

# Load new training and testing data
X_train_new, y_train_new = preprocess_data(new_train_data_path)
X_test_new, y_test_new = preprocess_data(new_test_data_path)

# Load the existing model
model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/MODEL.h5'  # Update this path
model = load_model(model_path)

# Optionally, adjust the model's learning rate or other parameters
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Further train the model on your new data
model.fit(X_train_new, y_train_new, epochs=10, batch_size=32, validation_data=(X_test_new, y_test_new))

# Evaluate the updated model on the new test set
y_test_pred = (model.predict(X_test_new) > 0.5).astype(int)
print("Updated Model Classification Report for New Testing Set:")
print(classification_report(y_test_new, y_test_pred))
accuracy_test = accuracy_score(y_test_new, y_test_pred)
print(f"\nUpdated Model Accuracy for New Testing Set: {accuracy_test:.4f}")

# Save the updated model
updated_model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5'  # Update this path
model.save(updated_model_path)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Updated Model Classification Report for New Testing Set:
              precision    recall  f1-score   support

           0       0.42      1.00      0.59       624
           1       0.00      0.00      0.00       875

    accuracy                           0.42      1499
   macro avg       0.21      0.50      0.29      1499
weighted avg       0.17      0.42      0.24      1499


Updated Model Accuracy for New Testing Set: 0.4163


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  saving_api.save_model(


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Assuming the preprocess_data function is already defined as above

# Path to the test data and the updated model
test_data_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_test.csv'  # Update this path with your test.csv location
updated_model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/update_MODEL.h5'  # Ensure this is where your updated model is saved

# Preprocess the test data
X_test, y_test = preprocess_data(test_data_path)

# Load the updated model
model = load_model(updated_model_path)

# Make predictions on the test data
y_test_pred = (model.predict(X_test) > 0.5).astype(int)

# Evaluate the model's performance on the test data
print("Classification Report for Test Data:")
print(classification_report(y_test, y_test_pred))
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Model Accuracy on Test Data: {accuracy_test:.4f}")


Classification Report for Test Data:
              precision    recall  f1-score   support

           0       0.42      1.00      0.59       624
           1       0.00      0.00      0.00       875

    accuracy                           0.42      1499
   macro avg       0.21      0.50      0.29      1499
weighted avg       0.17      0.42      0.24      1499

Model Accuracy on Test Data: 0.4163


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# Load your dataset
csv_file_path = '/content/drive/MyDrive/Depression/Android Dataset/Interview_train.csv'
dataset = pd.read_csv(csv_file_path)

# Extract features and labels
X = dataset.iloc[:, 2:].values  # Extract features from the 3rd column onwards
y = dataset.iloc[:, 1].values  # Extract class labels from the 2nd column

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Load pre-trained model
pretrained_model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/EDAIC_CNN.h5'  # Update with the path to your pre-trained model
pretrained_model = load_model(pretrained_model_path)

# Freeze the layers of the pre-trained model
for layer in pretrained_model.layers:
    layer.trainable = False

# Add new layers on top of the pre-trained model
model = Sequential()
model.add(pretrained_model)
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define callbacks, e.g., to save the best model during training
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)

# Train the model
history = model.fit(
    X_train_scaled,
    y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=10,  # Adjust as needed
    batch_size=32,
    callbacks=[checkpoint]
)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f'Test accuracy: {test_acc}')

# Optionally, save the trained model
model.save('/content/drive/MyDrive/Depression/EDAIC Dataset/fine-tuned.h5')




Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.86350, saving model to best_model.h5
Epoch 2/10
 1/85 [..............................] - ETA: 10s - loss: 0.6212 - accuracy: 0.7812

  saving_api.save_model(


Epoch 2: val_accuracy did not improve from 0.86350
Epoch 3/10
Epoch 3: val_accuracy improved from 0.86350 to 0.86647, saving model to best_model.h5
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.86647
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.86647
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.86647
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.86647
Epoch 8/10
Epoch 8: val_accuracy improved from 0.86647 to 0.86795, saving model to best_model.h5
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.86795
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.86795
Test accuracy: 0.859050452709198


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model

# Load your dataset
csv_file_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/testing_labels.csv'
dataset = pd.read_csv(csv_file_path)

# Extract features and labels
X = dataset.iloc[:, 2:].values  # Extract features from the 3rd column onwards
y_true = dataset.iloc[:, 1].values  # Extract class labels from the 2nd column

# Standardize features if needed
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Load your saved CNN model
saved_model_path = '/content/drive/MyDrive/Depression/EDAIC Dataset/fine-tuned.h5'  # Update with the path to your saved model
model = load_model(saved_model_path)

# Make predictions on the dataset
predictions = model.predict(X_scaled)
predictions_binary = (predictions > 0.5).astype('int32').flatten()  # Assuming binary classification

# Calculate accuracy
accuracy = accuracy_score(y_true, predictions_binary)
print(f"Model accuracy on the dataset: {accuracy * 100:.2f}%")


Model accuracy on the dataset: 66.64%
