In [8]:
!pip install librosa
import numpy as np
import pandas as pd
import os
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import IPython
import json
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, recall_score
from sklearn.preprocessing import RobustScaler, PowerTransformer, StandardScaler, MinMaxScaler, OneHotEncoder, FunctionTransformer, LabelBinarizer
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow import keras
from keras import Model, layers, models, callbacks
from keras.callbacks.ModelCheckpoint import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Reshape, MaxPooling2D, Dropout, Conv2D, MaxPool2D, Flatten, Input, LSTM
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras import metrics

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-win_amd64.whl.metadata (5.6 kB)
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
Downloading audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading pooch-1.8.2-py3-none-any.whl (64 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   -------------------- ------------------- 0.5/1.0 MB 3.4 MB/s eta 0:00:01
   -------------------- ------------------- 0.5/1.0 MB 3.4 MB/s eta 0:00:01
   -------------------------------------

ModuleNotFoundError: No module named 'keras.callbacks.ModelCheckpoint'

In [None]:
real_path = "KAGGLE/AUDIO/REAL"
fake_path = "KAGGLE/AUDIO/FAKE"
json_path = "KAGGLE/deep-voice-deepfake-voice-recognition-metadata.json"
csv_path = "KAGGLE/DATASET-balanced.csv"

with open(json_path, 'r') as f:
    metadata = json.load(f)

df = pd.read_csv(csv_path)
df['LABEL'] = df['LABEL'].apply(lambda x: 1 if x == 'REAL' else 0)
df.head()

In [None]:
def extract_features(audio_path, sr, start_time, duration, label, id, time):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=sr, offset=start_time, duration=duration)
    
    # Extract features
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    rms = np.mean(librosa.feature.rms(y=y), axis=1)
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr), axis=1)
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr), axis=1)
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr), axis=1)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y), axis=1)
    
    # Extract all 20 MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_mean = np.mean(mfcc, axis=1)  # Mean of all MFCC coefficients

    # Combine all features into one vector (8 base features + 20 MFCCs)
    features = np.concatenate([
        [np.mean(chroma_stft)], [np.mean(rms)], [np.mean(spectral_centroid)],
        [np.mean(spectral_bandwidth)], [np.mean(rolloff)], 
        [np.mean(zero_crossing_rate)], mfcc_mean, [label], [id], [time]
    ])

    return features

In [None]:
num_seconds = 10 
num_features = 29
num_total_segments = 11000
features = np.zeros((1, num_features))

file_index = 0
id = 0

for filename in os.listdir(real_path):
    if filename.endswith(".wav"):
        wav_file_path = os.path.join(real_path, filename)  # Ensure correct path
        duration = librosa.get_duration(filename=wav_file_path)
        max_segments = int(duration // num_seconds)  # Max number of 10-second segments we can extract
        # iterate through each segment
        for i in range(max_segments - 1):
            start_time = i * num_seconds
            for time in range(num_seconds):
                new_features  = extract_features(wav_file_path, 22050, start_time + time, 1, 1, id, time)
                features = np.concatenate((features, new_features.reshape(1, 29)), axis=0)
                file_index += 1
        id += 1


for filename in os.listdir(fake_path):
    if filename.endswith(".wav"):
        wav_file_path = os.path.join(fake_path, filename)  # Ensure correct path
        duration = librosa.get_duration(filename=wav_file_path)
        max_segments = int(duration // num_seconds)  # Max number of 10-second segments we can extract
        # iterate through each segment
        for i in range(max_segments - 1):
            start_time = i * num_seconds
            for time in range(num_seconds):
                new_features  = extract_features(wav_file_path, 22050, start_time + time, 1, 0, id, time)
                features = np.concatenate((features, new_features.reshape(1, 29)), axis=0)
                file_index += 1
        id += 1

In [None]:
base_columns = ['chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 
                'rolloff', 'zero_crossing_rate']
mfcc_columns = [f'mfcc{i+1}' for i in range(20)]
columns = base_columns + mfcc_columns + ['label', 'id', 'time']

df = pd.DataFrame(features, columns=columns)
df.to_csv('10_seconds_full.csv', index=False)

In [None]:
plt.figure(figsize=(20, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

In [None]:
label_correlation = df.corr()['label'].drop('label').drop('id')
correlation = label_correlation.abs().to_frame().T
correlation = correlation.sort_values(by=correlation.index[0], axis=1, ascending=True)

plt.figure(figsize=(20, 1))
ax = sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Absolute Correlation with label")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.show()

In [None]:
for var in columns:
    if var == 'label' or var == 'id' or var == 'time':
        continue
    plt.figure(figsize=(8, 6))  # Adjust figure size if needed
    sns.histplot(data=df, x=var, hue='label', kde=True, stat='count', common_norm=False)
    plt.xlabel(var)
    plt.ylabel('Count')
    plt.title(f'Distribution of {var} for Real and Fake Labels')
    plt.legend(title='LABEL')
    plt.show()

In [None]:
df = pd.read_csv(csv_path)
df['LABEL'] = df['LABEL'].apply(lambda x: 1 if x == 'REAL' else 0)

y = df['LABEL']
X = df.drop(columns=['LABEL'])

seed = 2002513
xtr, xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=seed)

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(xtr, ytr)

y_pred = model.predict(xte)
accuracy = accuracy_score(yte, y_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
scaler = StandardScaler()
xtr_scaled = scaler.fit_transform(xtr)
xte_scaled = scaler.transform(xte)

model = LogisticRegression(max_iter=1000)
model.fit(xtr_scaled, ytr)
y_pred = model.predict(xte_scaled)
accuracy = accuracy_score(yte, y_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
df = pd.read_csv('10_seconds_full.csv')

y = df['label']
X = df.drop(columns=['label','id', 'time'])

seed = 2002513
xtr, xte, ytr, yte = train_test_split(X, y, test_size=0.2, stratify=y, random_state=seed)

scaler = StandardScaler()
xtr_scaled = scaler.fit_transform(xtr)
xte_scaled = scaler.transform(xte)

model = LogisticRegression(max_iter=1000)
model.fit(xtr_scaled, ytr)
y_pred = model.predict(xte_scaled)
accuracy = accuracy_score(yte, y_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
def sigmoid(X):
    return 1 / (1+np.exp(-X))

def tanh(X):
    return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))

def softmax(X):
    exp_X = np.exp(X)
    exp_X_sum = np.sum(exp_X, axis=1).reshape(-1, 1)
    exp_X = exp_X / exp_X_sum
    return exp_X

def tanh_derivative(X):
    return 1-(X**2)

In [None]:
def init_params(input_units, hidden_units, output_units):
    mean = 0
    std = 0.01    
    
    forget_gate_weights = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    input_gate_weights_percent  = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    input_gate_weights_memory  = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))
    output_gate_weights = np.random.normal(mean,std,(input_units+hidden_units,hidden_units))


    forget_gate_bias = np.random.normal(mean,std,(1, hidden_units))
    input_gate_percent_bias = np.random.normal(mean,std,(1, hidden_units))
    input_gate_memory_bias = np.random.normal(mean,std,(1, hidden_units))
    output_gate_bias = np.random.normal(mean,std,(1, hidden_units))
    
    hidden_output_weights = np.random.normal(mean,std,(hidden_units,output_units))
    
    parameters = dict()
    parameters['fgw'] = forget_gate_weights
    parameters['igpw'] = input_gate_weights_percent
    parameters['igmw'] = input_gate_weights_memory
    parameters['ogw'] = output_gate_weights

    parameters['fgb'] = forget_gate_bias
    parameters['igpb'] = input_gate_percent_bias
    parameters['igmb'] = input_gate_memory_bias
    parameters['ogb'] = output_gate_bias
    
    return parameters

In [None]:
def lstm_cell(parameters, short_term_matrix, long_term_matrix, input_matrix):
    fgw = parameters['fgw']
    igpw = parameters['igpw']
    igmw = parameters['igmw']
    ogw = parameters['ogw']
    
    
    fgb = parameters['fgb']
    igpb = parameters['igpb']
    igmb = parameters['igmb']
    ogb = parameters['ogb']

    # concatenate input and short term memory matrix
    concat_dataset = np.concatenate((short_term_matrix, input_matrix), axis=1)

    # forget gate
    fa = np.matmul(concat_dataset, fgw)
    fa = np.add(fa, fgb)
    forget_gate_output = sigmoid(fa)

    # input gate
    input_potential = np.matmul(concat_dataset, igpw)
    input_potential = np.add(input_potential, igpb)
    input_potential = sigmoid(input_potential)

    input_memory = np.matmul(concat_dataset, igmw)
    input_memory = np.add(input_memory, igmb)
    input_memory = tanh(input_memory)

    input_gate_output = input_potential * input_memory

    # update long term memory
    long_term_memory = np.multiply(long_term_matrix, forget_gate_output)
    long_term_memory = np.add(long_term_memory, input_gate_output)

    # output gate
    output_percent = np.matmul(concat_dataset, ogw)
    output_percent = np.add(output_percent, ogb)
    output_percent = sigmoid(output_percent)

    output_potential = tanh(long_term_memory)

    # update short term memory
    short_term_memory = np.multiply(output_percent, output_potential)

    # return long and short term memory
    return long_term_memory, short_term_memory

In [1]:
# Assuming the input size and hidden units for the LSTM
input_units = 26  # Number of features in the input
hidden_units = 128  # Number of LSTM hidden units
output_units = 1  # Output units (optional for full model)


# Initialize the parameters
parameters = init_params(input_units, hidden_units, output_units)

input_matrix = xtr
# Create a sample short-term memory (previous hidden state, [1, 128])
short_term_matrix = np.random.randn(20, hidden_units)  # Short-term memory from the previous time step

# Create a sample long-term memory (previous cell state, [1, 128])
long_term_matrix = np.random.randn(20, hidden_units)  # Long-term memory from the previous time step

# Run the LSTM cell
new_long_term_memory, new_short_term_memory = lstm_cell(parameters, short_term_matrix, long_term_matrix, input_matrix)

NameError: name 'init_params' is not defined