In [39]:
import os
import numpy as np
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, LSTM, Dense, Flatten, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

# Define directories
base_dir = "/Users/siddharthtanwar/Documents/York/momenta/for-original"
train_fake_dir = os.path.join(base_dir, "training/fake")
train_real_dir = os.path.join(base_dir, "training/real")
test_fake_dir = os.path.join(base_dir, "testing/fake")
test_real_dir = os.path.join(base_dir, "testing/real")
val_fake_dir = os.path.join(base_dir, "validation/fake")
val_real_dir = os.path.join(base_dir, "validation/real")

# Function to extract features
def extract_features(file_path, model_type):
    y, sr = librosa.load(file_path)
    if model_type == 1:
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        pitch, _ = librosa.piptrack(y=y, sr=sr)
        return np.hstack((mfcc.mean(axis=1), pitch.mean(axis=1)))
    elif model_type == 2:
        rms = librosa.feature.rms(y=y)
        return [rms.mean()]  # Return a single value
    elif model_type == 3:
        zcr = librosa.feature.zero_crossing_rate(y)
        return [zcr.mean()]  # Return a single value


# Function to load data
def load_data(fake_dir, real_dir, model_type):
    features = []
    labels = []
    
    # Add tqdm for fake files
    for file in tqdm(os.listdir(fake_dir), desc=f"Loading fake data for Model {model_type}"):
        if file.endswith('.wav'):
            features.append(extract_features(os.path.join(fake_dir, file), model_type))
            labels.append(0)
    
    # Add tqdm for real files
    for file in tqdm(os.listdir(real_dir), desc=f"Loading real data for Model {model_type}"):
        if file.endswith('.wav'):
            features.append(extract_features(os.path.join(real_dir, file), model_type))
            labels.append(1)
    
    return np.array(features), np.array(labels)




In [114]:
# Load data for each model
print("Loading data for Model 1...")
X_train_1, y_train_1 = load_data(train_fake_dir, train_real_dir, 1)
X_test_1, y_test_1 = load_data(test_fake_dir, test_real_dir, 1)
X_val_1, y_val_1 = load_data(val_fake_dir, val_real_dir, 1)


Loading data for Model 1...


Loading fake data for Model 1: 100%|████████| 26941/26941 [00:17<00:00, 1517.18it/s]
Loading real data for Model 1: 100%|██████████| 26941/26941 [05:23<00:00, 83.19it/s]
Loading fake data for Model 1: 100%|███████████| 2370/2370 [00:16<00:00, 147.90it/s]
Loading real data for Model 1: 100%|███████████| 2264/2264 [00:18<00:00, 124.26it/s]
Loading fake data for Model 1: 100%|██████████| 5400/5400 [00:04<00:00, 1229.76it/s]
Loading real data for Model 1: 100%|████████████| 5400/5400 [01:04<00:00, 83.80it/s]


In [41]:
print("Loading data for Model 2...")
X_train_2, y_train_2 = load_data(train_fake_dir, train_real_dir, 2)
X_test_2, y_test_2 = load_data(test_fake_dir, test_real_dir, 2)
X_val_2, y_val_2 = load_data(val_fake_dir, val_real_dir, 2)


Loading data for Model 2...


Loading fake data for Model 2: 100%|████████| 26941/26941 [00:04<00:00, 6728.91it/s]
Loading real data for Model 2: 100%|█████████| 26941/26941 [01:08<00:00, 393.65it/s]
Loading fake data for Model 2: 100%|███████████| 2370/2370 [00:03<00:00, 723.15it/s]
Loading real data for Model 2: 100%|███████████| 2264/2264 [00:03<00:00, 633.87it/s]
Loading fake data for Model 2: 100%|██████████| 5400/5400 [00:00<00:00, 5920.77it/s]
Loading real data for Model 2: 100%|███████████| 5400/5400 [00:14<00:00, 384.29it/s]


In [57]:
print("Loading data for Model 3...")
X_train_3, y_train_3 = load_data(train_fake_dir, train_real_dir, 3)
X_test_3, y_test_3 = load_data(test_fake_dir, test_real_dir, 3)
X_val_3, y_val_3 = load_data(val_fake_dir, val_real_dir, 3)


Loading data for Model 3...


Loading fake data for Model 3: 100%|████████| 26941/26941 [00:05<00:00, 4848.46it/s]
Loading real data for Model 3: 100%|█████████| 26941/26941 [01:24<00:00, 317.89it/s]
Loading fake data for Model 3: 100%|███████████| 2370/2370 [00:04<00:00, 530.59it/s]
Loading real data for Model 3: 100%|███████████| 2264/2264 [00:04<00:00, 455.87it/s]
Loading fake data for Model 3: 100%|██████████| 5400/5400 [00:01<00:00, 4761.20it/s]
Loading real data for Model 3: 100%|███████████| 5400/5400 [00:16<00:00, 323.46it/s]


In [142]:
from tensorflow.keras.layers import GlobalAveragePooling2D  # Add this import
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense
from tensorflow.keras.optimizers import Adam

def create_model_1(input_shape):
    model = Sequential([
        Input(shape=input_shape),  # Explicit input layer
        
        # Modified CNN Layers - using (3,1) kernels for your (1038,1,1) input
        Conv2D(32, (3,1), activation='relu', padding='same'),
        MaxPooling2D((2,1)),  # Only pooling along time dimension
        
        Conv2D(64, (3,1), activation='relu', padding='same'),
        MaxPooling2D((2,1)),
        
        Conv2D(64, (3,1), activation='relu', padding='same'),
        
        # Prepare for LSTM
        Reshape((-1, 64)),  # Flatten spatial dimensions
        
        # BiLSTM Layers
        Bidirectional(LSTM(32, return_sequences=True)),
        Bidirectional(LSTM(32)),
        
        # Output
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Data preparation with validation set included
X_train_1 = X_train_1.reshape(X_train_1.shape[0], X_train_1.shape[1], 1, 1)
X_test_1 = X_test_1.reshape(X_test_1.shape[0], X_test_1.shape[1], 1, 1)
X_val_1 = X_val_1.reshape(X_val_1.shape[0], X_val_1.shape[1], 1, 1)

# SMOTE application (only on training data)
smote = SMOTE(random_state=42, sampling_strategy=0.7)
X_train_flat = X_train_1.reshape(X_train_1.shape[0], -1)
X_train_res, y_train_res = smote.fit_resample(X_train_flat, y_train_1)
X_train_res = X_train_res.reshape(-1, X_train_1.shape[1], 1, 1)

# Class weights calculation
class_weights = compute_class_weight('balanced', classes=np.array([0, 1]), y=y_train_1)
class_weights = dict(enumerate(class_weights))

# Model training with proper validation
model_1 = create_model_1((1038, 1, 1))
history_1 = model_1.fit(
    X_train_res, y_train_res,
    epochs=20,
    batch_size=128,
    validation_data=(X_val_1, y_val_1),  # Use explicit validation data
    class_weight=class_weights,
    callbacks=[
        EarlyStopping(patience=2, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=1)
    ],
    verbose=1
)

print("\n=== Test Set Evaluation ===")

# 1. First verify threshold on validation set
y_val_proba = model_1.predict(X_val_1, verbose=1)
y_val_pred = (y_val_proba > best_threshold).astype(int)
print("\nValidation Set Performance with Threshold %.3f:" % best_threshold)
print(classification_report(y_val_1, y_val_pred))
print("Validation Confusion Matrix:\n", confusion_matrix(y_val_1, y_val_pred))

# 2. Then evaluate on test set
y_test_proba = model_1.predict(X_test_1, verbose=1)
y_test_pred = (y_test_proba > best_threshold).astype(int)

# 3. Calculate test metrics
test_accuracy = accuracy_score(y_test_1, y_test_pred)
test_f1 = f1_score(y_test_1, y_test_pred)
test_precision = precision_score(y_test_1, y_test_pred)
test_recall = recall_score(y_test_1, y_test_pred)



# 4. Optionally find test-optimal threshold if needed
test_thresholds = np.linspace(0.1, 0.5, 20)
test_opt_threshold = max(test_thresholds, 
                        key=lambda t: f1_score(y_test_1, (y_test_proba > t).astype(int)))
print("\nTest-Optimal Threshold: %.3f" % test_opt_threshold)

Epoch 1/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 242ms/step - accuracy: 0.7789 - loss: 0.3855 - val_accuracy: 0.9880 - val_loss: 0.0459 - learning_rate: 0.0010
Epoch 2/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 322ms/step - accuracy: 0.9787 - loss: 0.0546 - val_accuracy: 0.9967 - val_loss: 0.0131 - learning_rate: 0.0010
Epoch 3/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 307ms/step - accuracy: 0.9946 - loss: 0.0147 - val_accuracy: 0.9960 - val_loss: 0.0118 - learning_rate: 0.0010
Epoch 4/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 286ms/step - accuracy: 0.9719 - loss: 0.0692 - val_accuracy: 0.9842 - val_loss: 0.0501 - learning_rate: 0.0010
Epoch 5/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 301ms/step - accuracy: 0.9919 - loss: 0.0232 - val_accuracy: 0.9974 - val_loss: 0.0102 - learning_rate: 5.0000e-04
Epoch 6/20
[1m358/358[0m [32m━━━━━━━━━━━━━━━

In [53]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
from tqdm import tqdm
from imblearn.over_sampling import SMOTE

# 1. Feature Engineering with Spectral Features
def add_spectral_features(X):
    """Enhance feature set with spectral centroid-based features."""
    X_df = pd.DataFrame(X.copy())

    for i in tqdm(range(X_df.shape[0]), desc="Enhancing Features", unit="samples"):
        with np.errstate(invalid='ignore'):
            X_df.loc[i, 'feature_var'] = np.nanvar(X_df.iloc[i])
            X_df.loc[i, 'feature_range'] = np.nanmax(X_df.iloc[i]) - np.nanmin(X_df.iloc[i])
            X_df.loc[i, 'feature_entropy'] = -np.nansum(X_df.iloc[i] * np.log1p(X_df.iloc[i]))
            X_df.loc[i, 'feature_rolloff'] = np.nanpercentile(X_df.iloc[i], 85)

    X_df.columns = X_df.columns.astype(str)
    return X_df.values

# 2. Data Preparation
def prepare_data(X_train, X_val, X_test):
    X_train_enhanced = add_spectral_features(X_train)
    X_val_enhanced = add_spectral_features(X_val)
    X_test_enhanced = add_spectral_features(X_test)

    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train_enhanced)
    X_val_imputed = imputer.transform(X_val_enhanced)
    X_test_imputed = imputer.transform(X_test_enhanced)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_imputed)
    X_val_scaled = scaler.transform(X_val_imputed)
    X_test_scaled = scaler.transform(X_test_imputed)

    return X_train_scaled, X_val_scaled, X_test_scaled

#Assuming X_train_2, y_train_2, X_val_2, y_val_2, X_test_2, y_test_2 are already loaded.
X_train_scaled, X_val_scaled, X_test_scaled = prepare_data(X_train_2, X_val_2, X_test_2)

smote = SMOTE(sampling_strategy=0.7, random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train_2) #important, scaled data

# 2. Logistic Regression
model_lr = LogisticRegression(class_weight={0: 1, 1: 1.6}, random_state=42, solver='liblinear')

# 3. Train Logistic Regression
print("\nTraining Logistic Regression...")
model_lr.fit(X_train_resampled, y_train_resampled)

# 4. Predict probabilities for threshold optimization
y_val_proba = model_lr.predict_proba(X_val_scaled)[:, 1] #predict probability of class 1

# 5. Threshold optimization
best_threshold = 0.5
best_score = 0

print("\nOptimizing Threshold...")
for t in tqdm(np.linspace(0.25, 0.4, 30), desc="Searching for Best Threshold"): #adjusted range
    score = (precision_score(y_val_2, (y_val_proba > t).astype(int), zero_division=0) *
             min(recall_score(y_val_2, (y_val_proba > t).astype(int), zero_division=0),
                 recall_score(1 - y_val_2, 1 - (y_val_proba > t).astype(int), zero_division=0)))
    if score > best_score:
        best_score = score
        best_threshold = t

# 6. Final predictions on the test set
y_test_pred = (model_lr.predict_proba(X_test_scaled)[:, 1] > best_threshold).astype(int)

# 7. Results
print("\n=== Logistic Regression ===")
print(f"Optimal Threshold: {best_threshold:.3f}")
print("Test Performance:")
print(classification_report(y_test_2, y_test_pred, zero_division=0))
print("Confusion Matrix:")
print(confusion_matrix(y_test_2, y_test_pred))

Enhancing Features: 100%|██████████████| 30204/30204 [00:22<00:00, 1342.32samples/s]
Enhancing Features: 100%|████████████████| 6075/6075 [00:03<00:00, 1948.47samples/s]
Enhancing Features: 100%|████████████████| 4634/4634 [00:02<00:00, 1943.65samples/s]



Training Logistic Regression...

Optimizing Threshold...


Searching for Best Threshold: 100%|████████████████| 30/30 [00:00<00:00, 167.35it/s]


=== Logistic Regression ===
Optimal Threshold: 0.400
Test Performance:
              precision    recall  f1-score   support

           0       0.67      0.81      0.73      2370
           1       0.75      0.59      0.66      2264

    accuracy                           0.70      4634
   macro avg       0.71      0.70      0.70      4634
weighted avg       0.71      0.70      0.70      4634

Confusion Matrix:
[[1919  451]
 [ 936 1328]]





In [169]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam

# 2. Data Preparation (No Feature Enhancement)
def prepare_data_lite(X_train, X_val, X_test):
    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train)
    X_val_imputed = imputer.transform(X_val)
    X_test_imputed = imputer.transform(X_test)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_imputed)
    X_val_scaled = scaler.transform(X_val_imputed)
    X_test_scaled = scaler.transform(X_test_imputed)

    return X_train_scaled, X_val_scaled, X_test_scaled

# Load the data (assuming you have X_train_3, y_train_3, etc.)
X_train_scaled, X_val_scaled, X_test_scaled = prepare_data_lite(X_train_3, X_val_3, X_test_3)

# Apply SMOTE to the training set only
smote = SMOTE(sampling_strategy=0.7, random_state=42)  # Adjust sampling_strategy as needed
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, np.array(y_train_3))

# Define Simple MLP Model
def create_simple_mlp(input_dim):
    model = Sequential()

    # Input Layer
    model.add(Input(shape=(input_dim,)))

    # First Hidden Layer
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))  # Dropout for regularization

    # Second Hidden Layer
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))

    # Output Layer
    model.add(Dense(1, activation='relu'))  # Binary classification

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.005), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create and train the MLP model
input_dim = X_train_scaled.shape[1]  # Number of features
mlp_model = create_simple_mlp(input_dim)

# Train the model
history = mlp_model.fit(
    X_train_resampled, y_train_resampled,
    validation_data=(X_val_scaled, y_val_3),
    epochs=10,  # Fewer epochs for faster training
    batch_size=32,
    verbose=1
)

# Predictions on validation set
y_val_pred_mlp = (mlp_model.predict(X_val_scaled) > 0.5).astype(int).flatten()

# Validation results
print("\nValidation Results (Simple MLP):")
print(classification_report(y_val_3, y_val_pred_mlp, zero_division=0))
print("Validation Confusion Matrix:")
print(confusion_matrix(y_val_3, y_val_pred_mlp))



Epoch 1/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 409us/step - accuracy: 0.6437 - loss: 0.7318 - val_accuracy: 0.7756 - val_loss: 0.5238
Epoch 2/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 370us/step - accuracy: 0.6508 - loss: 0.6116 - val_accuracy: 0.7310 - val_loss: 0.5429
Epoch 3/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 371us/step - accuracy: 0.6581 - loss: 0.6029 - val_accuracy: 0.8695 - val_loss: 0.6271
Epoch 4/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 372us/step - accuracy: 0.6685 - loss: 0.6215 - val_accuracy: 0.7903 - val_loss: 0.4652
Epoch 5/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 376us/step - accuracy: 0.6721 - loss: 0.6057 - val_accuracy: 0.7715 - val_loss: 0.5203
Epoch 6/10
[1m1432/1432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 381us/step - accuracy: 0.6616 - loss: 0.6005 - val_accuracy: 0.7621 - val_loss: 0.4747
Epoc