In [4]:
import os
import librosa
import numpy as np
import pandas as pd

# Path to the CREMA-D dataset
cremad_dir = "D:/depression_detect/datasets/cremad"

# Function to extract audio features
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)  # Load audio
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        return np.hstack([mfccs, chroma, mel])  # Combine all features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Process all audio files in the dataset
features, labels, file_names = [], [], []
for file in os.listdir(cremad_dir):
    if file.endswith(".wav"):
        label = 1 if "SAD" in file or "DIS" in file else 0  # Depressed = 1, Not Depressed = 0
        file_path = os.path.join(cremad_dir, file)
        feature_vector = extract_features(file_path)
        if feature_vector is not None:
            features.append(feature_vector)
            labels.append(label)
            file_names.append(file)  # Save file names for reference

# Convert extracted features into a DataFrame
feature_df = pd.DataFrame(features)
feature_df['label'] = labels
feature_df['file_name'] = file_names  # Add file names for tracking

# Save extracted features to a CSV file
save_path = "D:/depression_detection/preprocessed_data/cremad_features.csv"
feature_df.to_csv(save_path, index=False)
print(f"Feature extraction complete! Saved to {save_path}")


  return pitch_tuning(


Feature extraction complete! Saved to D:/depression_detection/preprocessed_data/cremad_features.csv


In [5]:
import os

file_path = "D:/depression_detect/preprocessed_data/cremad_features.csv"

if os.path.exists(file_path):
    print("✅ cremad_features.csv exists!")
else:
    print("❌ cremad_features.csv is missing!")


✅ cremad_features.csv exists!


In [6]:
import os
import librosa
import pandas as pd
import numpy as np

# Define dataset path
dataset_path = "D:/depression_detect/datasets/cremad/"

# Get all audio files
audio_files = [f for f in os.listdir(dataset_path) if f.endswith('.wav')]

# Define emotion-to-label mapping
depressed_labels = ["SAD", "ANG"]  # Depressed emotions → Label 1
non_depressed_labels = ["HAP", "NEU"]  # Non-depressed emotions → Label 0

# Initialize list for features
data = []

# Extract features and labels
for file in audio_files:
    file_path = os.path.join(dataset_path, file)

    # Extract emotion from filename (Example: "1001_IEO_SAD_HI.wav" → "SAD")
    emotion = file.split("_")[2]

    # Assign label (1 for depressed, 0 for non-depressed)
    label = 1 if emotion in depressed_labels else 0

    # Load audio
    y, sr = librosa.load(file_path, sr=None)

    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)  # Take mean over time

    # Append to list
    data.append([file_path] + mfccs_mean.tolist() + [label])

# Create DataFrame
columns = ['file_path'] + [f'mfcc_{i+1}' for i in range(13)] + ['label']
df = pd.DataFrame(data, columns=columns)

# Save to CSV
save_path = "D:/depression_detect/preprocessed_data/cremad_features.csv"
os.makedirs(os.path.dirname(save_path), exist_ok=True)  # Create directory if missing
df.to_csv(save_path, index=False)

print("✅ `cremad_features.csv` successfully created with labels!")


✅ `cremad_features.csv` successfully created with labels!


In [7]:
import pandas as pd

data_path = "D:/depression_detection/preprocessed_data/cremad_features.csv"
df = pd.read_csv(data_path)

print(df.head())  # Show first few rows
print(df.info())  # Check column data types


           0           1          2          3          4          5  \
0 -306.02740   92.670235   8.491313  23.965403   7.477993  -5.759456   
1 -346.39963   95.839120  10.516283  31.619215  15.872088  -6.845447   
2 -321.42026   94.760910   8.155397  23.323242  11.719156  -7.116331   
3 -303.30374   92.528890   4.231231  27.970133  10.869824 -11.878345   
4 -335.49590  100.393310   9.384935  30.160906  11.466775  -3.333670   

           6         7         8          9  ...       145       146  \
0 -11.883088 -9.676736 -3.996747 -13.352563  ...  0.002250  0.003137   
1  -6.629935 -4.978727 -5.310654 -10.283518  ...  0.000678  0.000941   
2  -8.534803 -4.996966 -4.994400 -13.706510  ...  0.004759  0.011868   
3 -10.095112 -7.149731 -7.651760 -17.085900  ...  0.008727  0.012495   
4  -8.350987 -9.757345 -6.079327 -12.109532  ...  0.000724  0.000856   

        147       148       149       150       151       152  label  \
0  0.003351  0.002825  0.002283  0.002875  0.003217  0.002537 

In [8]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load extracted features
data_path = "D:/depression_detect/preprocessed_data/cremad_features.csv"
df = pd.read_csv(data_path)

# Print column names to check if 'file_path' exists
print("Column Names in CSV:", df.columns)

# If 'file_path' exists, extract labels, else notify user
if 'file_path' in df.columns:
    import os
    
    def extract_label(file_path):
        filename = os.path.basename(file_path)
        emotion = filename.split('_')[2]  # Example: "1001_DFA_**ANG**_XX.wav"
        return 1 if emotion == "SAD" else 0  # Depressed = 1, Not Depressed = 0
    
    df['label'] = df['file_path'].apply(extract_label)
    df = df.drop(columns=['file_path'])  # Drop file_path after extracting labels
else:
    print("❌ Warning: 'file_path' column is missing! Add labels manually.")
    exit()  # Stop execution because labels are required

# Feature and label split
X = df.drop(columns=['label'])
y = df['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize and train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy:.4f}")

# Confusion matrix & classification report
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save the trained model
model_path = "D:/depression_detect/models/random_forest_cremad.pkl"
joblib.dump(rf_model, model_path)
print(f"✅ Model saved to: {model_path}")


Column Names in CSV: Index(['file_path', 'mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6',
       'mfcc_7', 'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12',
       'mfcc_13', 'label'],
      dtype='object')
✅ Model Accuracy: 0.8522

Confusion Matrix:
 [[1188   47]
 [ 173   81]]

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.96      0.92      1235
           1       0.63      0.32      0.42       254

    accuracy                           0.85      1489
   macro avg       0.75      0.64      0.67      1489
weighted avg       0.83      0.85      0.83      1489

✅ Model saved to: D:/depression_detect/models/random_forest_cremad.pkl


In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}


In [10]:
print("Columns in DataFrame:", df.columns)


Columns in DataFrame: Index(['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7',
       'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13',
       'label'],
      dtype='object')


In [11]:
print(df.columns)


Index(['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7',
       'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13',
       'label'],
      dtype='object')


In [12]:
print("Unique values in 'y':", y.unique())
print("Class distribution:\n", y.value_counts())


Unique values in 'y': [0 1]
Class distribution:
 label
0    6171
1    1271
Name: count, dtype: int64


In [13]:
print(df.head())

       mfcc_1      mfcc_2     mfcc_3     mfcc_4     mfcc_5     mfcc_6  \
0 -306.027405   92.670235   8.491313  23.965403   7.477993  -5.759456   
1 -346.399628   95.839119  10.516283  31.619215  15.872088  -6.845447   
2 -321.420258   94.760910   8.155397  23.323242  11.719156  -7.116331   
3 -303.303741   92.528893   4.231231  27.970133  10.869824 -11.878345   
4 -335.495911  100.393311   9.384935  30.160906  11.466775  -3.333670   

      mfcc_7    mfcc_8    mfcc_9    mfcc_10   mfcc_11   mfcc_12   mfcc_13  \
0 -11.883088 -9.676736 -3.996747 -13.352563  0.408197 -9.709486 -6.127124   
1  -6.629935 -4.978727 -5.310654 -10.283518 -2.534367 -7.255390 -6.153906   
2  -8.534803 -4.996966 -4.994400 -13.706510 -3.357414 -8.454173 -6.561941   
3 -10.095112 -7.149731 -7.651760 -17.085899 -0.201025 -8.867324 -9.357766   
4  -8.350987 -9.757345 -6.079327 -12.109532  1.537681 -9.795646 -3.472060   

   label  
0      0  
1      0  
2      0  
3      0  
4      0  


In [14]:
import os
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load Dataset
df = pd.read_csv("D:/depression_detect/preprocessed_data/cremad_features.csv")

# Step 1: Extract Labels from File Paths
def extract_label(file_path):
    """Extracts label from file name based on emotion code in filename."""
    filename = os.path.basename(file_path)  # Extracts '1001_DFA_ANG.wav'
    emotion = filename.split('_')[-2]  # Extracts 'ANG' (corrected position)

    # Map emotions to binary labels (Modify based on dataset specifics)
    depressed_emotions = ["ANG", "SAD", "FEA", "DIS"]  # Depressed (1)
    return 1 if emotion in depressed_emotions else 0

# Step 2: Generate Labels if Missing
if 'label' not in df.columns:
    if 'file_path' in df.columns:
        df['label'] = df['file_path'].apply(extract_label)
        
    else:
        raise KeyError("❌ ERROR: 'file_path' column is missing, can't extract labels!")

# **Drop `file_path` column to prevent string-to-float error**
if 'file_path' in df.columns:
    df.drop(columns=['file_path'], inplace=True)

# Step 3: Split Features & Labels
X = df.drop(columns=['label'])  # Features
y = df['label']  # Labels

# Step 4: Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print("✅ SMOTE applied! New class distribution:")
print(y_resampled.value_counts())

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Step 6: Train Random Forest Model
model = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=2, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"\n✅ Model Accuracy after SMOTE: {accuracy:.4f}\n")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save Model
model_path = "D:/depression_detect/models/random_forest_cremad_smote.pkl"
joblib.dump(model, model_path)
print(f"\n✅ Model saved to: {model_path}")


✅ SMOTE applied! New class distribution:
label
1    4900
0    4900
Name: count, dtype: int64

✅ Model Accuracy after SMOTE: 0.7367

Confusion Matrix:
 [[764 192]
 [324 680]]

Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.80      0.75       956
           1       0.78      0.68      0.72      1004

    accuracy                           0.74      1960
   macro avg       0.74      0.74      0.74      1960
weighted avg       0.74      0.74      0.74      1960


✅ Model saved to: D:/depression_detect/models/random_forest_cremad_smote.pkl


In [15]:
print("Shape of X:", X.shape)  # Should be (samples, features)


Shape of X: (7442, 13)


In [16]:
print(df.columns)  # This will show all columns in your dataset


Index(['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7',
       'mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13',
       'label'],
      dtype='object')


In [17]:
df = df.drop(columns=['file_path'], errors='ignore')  # Ignore if not present


In [18]:
X = df.drop(columns=['label']).values
print("Shape of X after fixing:", X.shape)  # Should now be (samples, 13)


Shape of X after fixing: (7442, 13)


In [19]:
X = X.reshape(X.shape[0], 13, 1)  # Now it should work!


In [22]:
import os
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load Dataset
df = pd.read_csv("D:/depression_detect/preprocessed_data/cremad_features.csv")

# Extract Labels from File Paths
def extract_label(file_path):
    """Extracts label from file name based on emotion code in filename."""
    filename = os.path.basename(file_path)  
    emotion = filename.split('_')[-1].split('.')[0]  

    # Depression-related emotions
    depressed_emotions = ["ANG", "SAD", "FEA", "DIS"]  
    return 1 if emotion in depressed_emotions else 0

# Ensure labels exist
if 'label' not in df.columns:
    df['label'] = df['file_path'].apply(extract_label)

# Drop non-numeric columns
df.drop(columns=['file_path'], inplace=True, errors='ignore')

# Extract Features and Labels
X = df.drop(columns=['label']).values
y = df['label'].values

# Standardize Features (before reshaping)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape for LSTM: (samples, timesteps, features)
timesteps = X.shape[1]  # Each feature column becomes a timestep
X = X.reshape(X.shape[0], timesteps, 1)  

# Convert labels to categorical (binary classification)
y = to_categorical(y, num_classes=2)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"✅ Final Shapes: X_train={X_train.shape}, X_test={X_test.shape}, y_train={y_train.shape}, y_test={y_test.shape}")


✅ Final Shapes: X_train=(5953, 13, 1), X_test=(1489, 13, 1), y_train=(5953, 2), y_test=(1489, 2)


In [21]:
# Define the LSTM + CNN Model
model = Sequential([
    # Convolutional Layer for feature extraction
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(13, 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    
    # LSTM Layer for sequence learning
    LSTM(64, return_sequences=True),
    LSTM(32),

    # Fully Connected Layers
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')  # Binary classification (depressed/not depressed)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
# Train the model
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test), 
                    epochs=30, batch_size=32)

# Save the trained model
model.save("D:/depression_detect/models/lstm_cnn_model.h5")


Epoch 1/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6447 - loss: 0.6461 - val_accuracy: 0.6964 - val_loss: 0.6022
Epoch 2/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6934 - loss: 0.6003 - val_accuracy: 0.7199 - val_loss: 0.5614
Epoch 3/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.7099 - loss: 0.5831 - val_accuracy: 0.7334 - val_loss: 0.5528
Epoch 4/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.7164 - loss: 0.5845 - val_accuracy: 0.7173 - val_loss: 0.5544
Epoch 5/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7294 - loss: 0.5580 - val_accuracy: 0.7005 - val_loss: 0.5688
Epoch 6/30
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.7153 - loss: 0.5760 - val_accuracy: 0.7267 - val_loss: 0.5629
Epoch 7/30
[1m187/187[



In [27]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Early Stopping: Stop training if validation accuracy stops improving
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Reduce Learning Rate if validation loss doesn't improve
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

# Train again with callbacks
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test), 
                    epochs=50, batch_size=32, 
                    callbacks=[early_stopping, lr_scheduler])

# Save the improved model
model.save("D:/depression_detect/models/lstm_cnn_model.keras")


Epoch 1/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8916 - loss: 0.2415 - val_accuracy: 0.6823 - val_loss: 0.9815 - learning_rate: 0.0010
Epoch 2/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8840 - loss: 0.2421 - val_accuracy: 0.6870 - val_loss: 1.0403 - learning_rate: 0.0010
Epoch 3/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.8537 - loss: 0.3529 - val_accuracy: 0.6991 - val_loss: 0.9513 - learning_rate: 0.0010
Epoch 4/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8814 - loss: 0.2703 - val_accuracy: 0.6877 - val_loss: 1.0100 - learning_rate: 0.0010
Epoch 5/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9079 - loss: 0.2142 - val_accuracy: 0.6749 - val_loss: 1.1472 - learning_rate: 0.0010
Epoch 6/50
[1m181/187[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [

In [28]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define Optimized LSTM Model
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.01)), input_shape=(13, 1)),
    Dropout(0.4),
    BatchNormalization(),

    Bidirectional(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))),
    Dropout(0.4),
    BatchNormalization(),

    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.4),
    BatchNormalization(),

    Dense(2, activation='softmax')  # Output layer
])

# Compile Model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)  # Gradient Clipping
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, mode='min')

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-6)

# Train Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50, batch_size=64,  # Increased batch size
    callbacks=[early_stopping, reduce_lr]
)

# Save the Model
model.save('D:/depression_detection/models/optimized_lstm_model.keras')


Epoch 1/50


  super().__init__(**kwargs)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - accuracy: 0.5784 - loss: 5.7587 - val_accuracy: 0.6662 - val_loss: 2.4861 - learning_rate: 0.0010
Epoch 2/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - accuracy: 0.6364 - loss: 2.1675 - val_accuracy: 0.6662 - val_loss: 1.3923 - learning_rate: 0.0010
Epoch 3/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.6617 - loss: 1.2928 - val_accuracy: 0.6662 - val_loss: 1.0304 - learning_rate: 0.0010
Epoch 4/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.6666 - loss: 0.9667 - val_accuracy: 0.6662 - val_loss: 0.8532 - learning_rate: 0.0010
Epoch 5/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - accuracy: 0.6834 - loss: 0.8152 - val_accuracy: 0.6662 - val_loss: 0.7650 - learning_rate: 0.0010
Epoch 6/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step 

In [29]:
from sklearn.model_selection import train_test_split

# Assuming X and y contain your features and labels
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [30]:
import numpy as np

print("Unique labels in y:", np.unique(y))  # This should print 6 unique values


Unique labels in y: [0. 1.]


In [32]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import numpy as np

# Reshape `X` to 2D for SMOTE
X_flat = X.reshape(X.shape[0], -1)  # Convert (samples, timesteps, features) → (samples, features)
y_flat = np.argmax(y, axis=1)  # Convert one-hot encoded `y` back to categorical labels

# Apply SMOTE to balance the dataset
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_flat, y_flat)

# Convert `y_resampled` back to one-hot encoding
y_resampled = to_categorical(y_resampled, num_classes=num_classes)

# Split into train/val sets
X_train, X_val, y_train, y_val = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Reshape `X_train` & `X_val` back to 3D (for CNN + LSTM)
X_train = X_train.reshape(-1, X.shape[1], X.shape[2])
X_val = X_val.reshape(-1, X.shape[1], X.shape[2])

# Print Shapes to Verify
print("X_train shape:", X_train.shape)  # (samples, timesteps, features)
print("y_train shape:", y_train.shape)  # (samples, num_classes)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)


NameError: name 'num_classes' is not defined

In [33]:
# Define Model: CNN + BiLSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

model = Sequential([
    # Convolutional Layers (Feature Extraction)
    Conv1D(filters=64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.001), input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(filters=128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    # LSTM Layer
    Bidirectional(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.2)),
    Bidirectional(LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.2)),

    # Fully Connected Layers
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(num_classes, activation='softmax')  # Make sure num_classes = 6
])

# Compile Model
optimizer = Adam(learning_rate=5e-4)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Print Model Summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NameError: name 'num_classes' is not defined

In [34]:
# Learning Rate Scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    verbose=1
)

# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True,
    verbose=1
)

# Train Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[lr_scheduler, early_stopping]
)

# Save the trained model to your desired path
model.save("D:/depression_detect/models/audio_emotion_model.keras")


Epoch 1/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.7135 - loss: 0.5854 - val_accuracy: 0.7025 - val_loss: 0.6006 - learning_rate: 1.2500e-04
Epoch 2/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.7141 - loss: 0.5923 - val_accuracy: 0.7085 - val_loss: 0.6002 - learning_rate: 1.2500e-04
Epoch 3/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.7154 - loss: 0.5889 - val_accuracy: 0.6944 - val_loss: 0.6044 - learning_rate: 1.2500e-04
Epoch 4/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.7209 - loss: 0.5803 - val_accuracy: 0.7186 - val_loss: 0.5955 - learning_rate: 1.2500e-04
Epoch 5/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - accuracy: 0.7163 - loss: 0.5926 - val_accuracy: 0.7045 - val_loss: 0.5963 - learning_rate: 1.2500e-04
Epoch 6/50
[1m187/187[0m [32m━━━━━━━━━━━━━

In [43]:
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7199 - loss: 0.5778
Test Accuracy: 71.32%
Test Loss: 0.5886


In [44]:
import numpy as np

# Get predictions
predictions = model.predict(X_val)

# Convert softmax output to label index
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(y_val, axis=1)

# Print some predictions
for i in range(5):
    print(f"True: {true_labels[i]}, Predicted: {predicted_labels[i]}")


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
True: 0, Predicted: 0
True: 0, Predicted: 0
True: 0, Predicted: 0
True: 0, Predicted: 0
True: 0, Predicted: 0


In [45]:
from pydub import AudioSegment
import librosa
import numpy as np
import requests
import os

# ✅ Set FFmpeg path manually
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"

# Paths
VIDEO_PATH = "D:/depression_detect/videos/videoplayback.mp4"
AUDIO_PATH = "D:/depression_detect/videos/audio.wav"

# Convert Video to Audio
def extract_audio(video_path, audio_path):
    audio = AudioSegment.from_file(video_path, format="mp4")
    audio.export(audio_path, format="wav")
    print("✅ Audio extracted successfully:", audio_path)

# Extract MFCC features
def extract_mfcc(audio_path, sr=22050, n_mfcc=13):
    y, sr = librosa.load(audio_path, sr=sr, mono=True)
    if len(y) == 0:
        raise ValueError("❌ Audio file is empty or corrupted.")
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1).tolist()

# Send MFCC to voice model
def predict_voice_emotion(mfcc_features):
    url = "http://127.0.0.1:5000/predict"
    data = {"features": mfcc_features}

    try:
        response = requests.post(url, json=data, timeout=5)
        response.raise_for_status()
        result = response.json()
        return result.get("prediction", [None])[0]

    except requests.exceptions.RequestException as e:
        print("❌ API request failed:", e)
        return None

# Main
def main():
    extract_audio(VIDEO_PATH, AUDIO_PATH)
    mfcc_features = extract_mfcc(AUDIO_PATH)
    
    if mfcc_features:
        voice_prediction = predict_voice_emotion(mfcc_features)
        if voice_prediction is not None:
            voice_emotion = "Negative" if voice_prediction == 0 else "Positive"
            print(f"🎙️ **Voice Emotion Prediction:** {voice_emotion}")
        else:
            print("❌ Failed to predict emotion.")
    else:
        print("❌ Failed to extract MFCC features.")

if __name__ == "__main__":
    main()


✅ Audio extracted successfully: D:/depression_detect/videos/audio.wav
❌ API request failed: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000029C5A91C250>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
❌ Failed to predict emotion.


In [40]:
from pydub import AudioSegment
AudioSegment.ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe"  # Set FFmpeg path manually


In [41]:
import os
print(os.path.exists("D:/depression_detection/models/audio_emotion_model.keras"))


True


In [42]:
model.save("D:/depression_detection/models/audio_emotion_model.keras")


In [86]:
model.save("D:/depression_detection/models/audio_emotion_model.h5")




In [87]:
from tensorflow.keras.models import load_model
model = load_model("D:/depression_detection/models/audio_emotion_model.h5")




In [46]:
import pandas as pd
import tensorflow as tf
import os

# Define paths
history_csv_path = "D:/depression_detect/results/audio_training_history.csv"
epoch_log_path = "D:/depression_detect/results/audio_epoch_log.csv"
model_save_path = "D:/depression_detect/models/audio_emotion_model.keras"

# Custom Callback for logging per epoch
class EpochLogger(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        df = pd.DataFrame([logs])
        df["epoch"] = epoch + 1
        if not os.path.exists(epoch_log_path):
            df.to_csv(epoch_log_path, index=False)
        else:
            df.to_csv(epoch_log_path, mode='a', header=False, index=False)

# Learning Rate Scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    verbose=1
)

# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True,
    verbose=1
)

# Train Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[lr_scheduler, early_stopping, EpochLogger()]
)

# Save the model
model.save(model_save_path)

# Save training history to CSV
pd.DataFrame(history.history).to_csv(history_csv_path, index=False)


Epoch 1/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.7163 - loss: 0.5848 - val_accuracy: 0.7126 - val_loss: 0.5900 - learning_rate: 3.1250e-05
Epoch 2/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.7245 - loss: 0.5784 - val_accuracy: 0.7126 - val_loss: 0.5879 - learning_rate: 3.1250e-05
Epoch 3/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.7199 - loss: 0.5722 - val_accuracy: 0.7085 - val_loss: 0.5906 - learning_rate: 3.1250e-05
Epoch 4/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.7213 - loss: 0.5738 - val_accuracy: 0.7132 - val_loss: 0.5886 - learning_rate: 3.1250e-05
Epoch 5/50
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.7225 - loss: 0.5774 - val_accuracy: 0.7105 - val_loss: 0.5895 - learning_rate: 3.1250e-05
Epoch 6/50
[1m187/187[0m [32m━━━━━━━━━━━━━