In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, Bidirectional, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load the dataset
csv_output_path = 'drive/MyDrive/KAGGLE/audios.csv'
dataset = pd.read_csv(csv_output_path)

# Encode the labels
label_encoder = LabelEncoder()
dataset['LABEL'] = label_encoder.fit_transform(dataset['LABEL'])

In [None]:
# Split the dataset into features (X) and labels (y)
X = dataset.iloc[:, :-2].values  # Exclude 'origin_sample' and 'LABEL'
y = dataset['LABEL'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale the features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))
X_train_scaled = X_train_scaled.reshape(X_train.shape)
X_test_scaled = X_test_scaled.reshape(X_test.shape)

# Reshape the features to match the input shape expected by an RNN
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

#X_train_scaled = np.expand_dims(X_train_scaled,axis=-1)

In [None]:
X_train.shape

(79624, 26)

In [None]:
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=10, input_shape=(X_train.shape[1], 1),strides=5))
model.add(BatchNormalization())  # Batch Normalization layer
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=256, kernel_size=8, strides=4,padding='same'))
model.add(BatchNormalization())  # Batch Normalization layer

model.add(Conv1D(filters=256, kernel_size=4, strides=2,padding='same'))

model.add(Conv1D(filters=256, kernel_size=4, strides=2, padding='same'))
model.add(Bidirectional(LSTM(units=512, return_sequences=True)))

# Add Flatten layer to flatten the output before dense layers
model.add(Flatten())
model.add(Dense(256, input_dim=512, activation='relu'))
model.add(Dense(1, input_dim=256, activation='sigmoid'))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 4, 256)            2816      
                                                                 
 batch_normalization (Batch  (None, 4, 256)            1024      
 Normalization)                                                  
                                                                 
 max_pooling1d (MaxPooling1  (None, 2, 256)            0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 1, 256)            524544    
                                                                 
 batch_normalization_1 (Bat  (None, 1, 256)            1024      
 chNormalization)                                                
                                                        

In [None]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Define k-fold cross-validation
kf = KFold(n_splits=2, shuffle=True, random_state=42)

In [None]:
# Store accuracies in a list
accuracies = []
# Perform k-fold cross-validation
for train_index, val_index in kf.split(X_train_scaled):
    X_train_fold, X_val_fold = X_train_scaled[train_index], X_train_scaled[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

    # Train the model on the current fold
    model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, validation_data=(X_val_fold, y_val_fold))

    # Evaluate the model on the test set
    y_pred = model.predict(X_test_scaled)
    y_pred_binary = np.round(y_pred)
    accuracy = accuracy_score(y_test, y_pred_binary)

    accuracies.append(accuracy)

    print(f'Accuracy on test set: {accuracy}')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on test set: 0.9305736963729528
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on test set: 0.9405204460966543


In [None]:
# Calculate and print the average accuracy
average_accuracy = np.mean(accuracies)
print(f'Average Accuracy: {average_accuracy}')

Average Accuracy: 0.9429317793630061


In [None]:
# Load the dataset
csv_output_path = 'audioTest.csv'
dataset = pd.read_csv(csv_output_path)

# Encode the labels
label_encoder = LabelEncoder()
dataset['LABEL'] = label_encoder.fit_transform(dataset['LABEL'])

In [None]:
# Split the dataset into features (X) and labels (y)
X_test = dataset.iloc[:, :-2].values  # Exclude 'origin_sample' and 'LABEL'
y_test = dataset['LABEL'].values


In [None]:
X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))
X_test_scaled = X_test_scaled.reshape(X_test.shape)

# Reshape the features to match the input shape expected by an RNN
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)


In [None]:
y_pred = model.predict(X_test_scaled)
y_pred_binary = np.round(y_pred)
accuracy = accuracy_score(y_test, y_pred_binary)

print(f'Accuracy on test set: {accuracy}')

Accuracy on test set: 0.5555555555555556


In [None]:
import pickle
pickle_out = open("deepFake.pkl","wb")
pickle.dump(model, pickle_out)
pickle_out.close()