In [4]:
###-----------------
### Import Libraries
###-----------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections.abc import Callable
from typing import Literal

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# from torch.autograd import Variable

%matplotlib inline

In [17]:
###----------------
### Some parameters
###----------------

inpDir = './DATA/KAGGLE'
outDir = './home/dai/Downloads/Sahil/Final Project/DeepFake Audio Analyzer/KAGGLE/output'
subDir = 'Dataset'
audDir = "./DATA/KAGGLE/AUDIO"

RANDOM_STATE = 24 # REMEMBER: to remove at the time of promotion to production
np.random.seed(RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 11 # number of epochs
ALPHA = 0.001 # learning rate
NUM_SAMPLES = 1280 # How many samples we want to generate 
NOISE = 0.2 # Noise to be introduced in the data
TEST_SIZE = 0.2

# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 8),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

CMAP = 'coolwarm' # plt.cm.Spectral

plt.rcParams.update(params)

In [6]:
data_df = pd.read_csv(os.path.join(inpDir, 'DATASET-balanced.csv'))
data_df.head()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.338055,0.027948,2842.948867,4322.916759,6570.586186,0.04105,-462.169586,90.311272,19.073769,24.046888,...,-6.686564,0.902086,-7.251551,-1.198342,4.747403,-4.986279,0.953935,-5.013138,-6.77906,FAKE
1,0.443766,0.037838,2336.129597,3445.777044,3764.949874,0.04773,-409.413422,120.348808,-7.161531,5.114784,...,-2.131157,-6.876417,-1.359395,0.326401,-5.420016,-2.109968,-1.757634,-9.537907,-8.494421,FAKE
2,0.302528,0.056578,2692.988386,2861.13318,4716.610271,0.080342,-318.996033,120.490273,-24.625771,23.891073,...,-5.853725,-3.724773,-6.627182,-5.117002,-6.072106,-0.994653,-1.61712,-3.922354,-7.033001,FAKE
3,0.319933,0.031504,2241.665382,3503.766175,3798.641521,0.04718,-404.636749,136.320908,2.308172,-3.907071,...,-1.898315,-2.046493,-7.176277,-3.293508,4.209121,0.121835,-5.407063,-3.654926,-3.274857,FAKE
4,0.420055,0.016158,2526.069123,3102.659519,5025.077899,0.051905,-410.497925,152.7314,-18.266771,51.993462,...,-1.95234,0.810868,6.238493,6.555839,7.535542,2.849219,2.616843,-1.793357,-5.060998,FAKE


In [8]:
### Imports
import os
import librosa
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm


In [9]:
y, sr = librosa.load("/mnt/5866F9FE66F9DCA6/Puneet CDAC-Practice/CDAC_Project/DATA/KAGGLE/AUDIO/FAKE/Biden/biden-to-linus.wav")


In [10]:
len(y) # Length in seconds * Samples per second

13230000

In [11]:
sr # Sample rate

22050

In [12]:
round(len(y)/sr) # Returns 10min or 600 secs

600

In [13]:
def extract_features(file_path, segment_length):   # Function to extract features from an audio file
    
    try:
        
        y, sr = librosa.load(file_path) 
        #  Loading audio files returns 
        # y[audio time series. Multi-channel is supported]
        # sr[sampling rate of y] Note: Taking default 22050
        # [For more details : https://librosa.org/doc/0.10.1/generated/librosa.load.html]

        
        num_segments = int(np.ceil(len(y) / float(segment_length * sr))) 
        # Calculate the number of segments based on the segment length and audio length
        
        
        features = [] 
        # Initialize a list to store the features for this file

        
        for i in range(num_segments): # Extracting features for each segment
            
            start_frame = i * segment_length * sr   # Calculate start for the current segment
            end_frame = min(len(y), (i + 1) * segment_length * sr)    # Calculate  end frame for the current segment
            # making sure the last frame does not excede the lenght of audio time series

            
            y_segment = y[start_frame:end_frame]# Extract audio for current segment of audio file


            # Extract different features
            chroma_stft = np.mean(librosa.feature.chroma_stft(y=y_segment, sr=sr))
            # For more details : https://conference.scipy.org/proceedings/scipy2015/pdfs/brian_mcfee.pdf
            rms = np.mean(librosa.feature.rms(y=y_segment))
            spec_cent = np.mean(librosa.feature.spectral_centroid(y=y_segment, sr=sr))
            spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y_segment, sr=sr))
            rolloff = np.mean(librosa.feature.spectral_rolloff(y=y_segment, sr=sr))
            zcr = np.mean(librosa.feature.zero_crossing_rate(y_segment))
            mfccs = librosa.feature.mfcc(y=y_segment, sr=sr) # n_mfcc=20 by default
            mfccs_mean = np.mean(mfccs, axis=1)
            
            # Append the extracted features to the list
            features.append([chroma_stft, rms, spec_cent, spec_bw, rolloff, zcr, *mfccs_mean])

        return features
    
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None


In [14]:
# Function to create the dataset
def create_dataset(audio_dir, segment_length):
    
    labels = ['FAKE', 'REAL'] # Label for y
    feature_list = []

    # Iterate over all files in the audio_dir
    for label in labels:
        print(f'Processing {label} files...')
        files = os.listdir(os.path.join(audio_dir, label))
        # Wrap the files iterable with tqdm to show the progress bar
        for file in files:
            file_path = os.path.join(audio_dir, label, file)
            # Extract features for the current file
            file_features = extract_features(file_path, segment_length)
            if file_features:
                # Append features of all segments along with the label to the dataset
                for segment_features in file_features:
                    feature_list.append(segment_features + [label])
                    
    # Create a DataFrame with the dataset
    df = pd.DataFrame(feature_list, columns=['chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20','LABEL'])
    
    return df


In [18]:
# Create the dataset
dataset = create_dataset(audDir, 1)

Processing FAKE files...
Processing REAL files...


In [27]:
# Save the dataset to a CSV file
csv_output_path = './DATA/csvs/Test_audio_features_1.csv'
dataset.to_csv(csv_output_path, index=False)

print(f'Dataset created and saved to {csv_output_path}')

Dataset created and saved to ./DATA/csvs/Test_audio_features_1.csv


In [28]:
df = pd.read_csv('./DATA/csvs/Test_audio_features_1.csv')

In [31]:
# df.drop('origin_sample', axis=1, inplace=True)

In [30]:
data_df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.338055,0.027948,2842.948867,4322.916759,6570.586186,0.041050,-462.169586,90.311272,19.073769,24.046888,...,-6.686564,0.902086,-7.251551,-1.198342,4.747403,-4.986279,0.953935,-5.013138,-6.779060,FAKE
1,0.443766,0.037838,2336.129597,3445.777044,3764.949874,0.047730,-409.413422,120.348808,-7.161531,5.114784,...,-2.131157,-6.876417,-1.359395,0.326401,-5.420016,-2.109968,-1.757634,-9.537907,-8.494421,FAKE
2,0.302528,0.056578,2692.988386,2861.133180,4716.610271,0.080342,-318.996033,120.490273,-24.625771,23.891073,...,-5.853725,-3.724773,-6.627182,-5.117002,-6.072106,-0.994653,-1.617120,-3.922354,-7.033001,FAKE
3,0.319933,0.031504,2241.665382,3503.766175,3798.641521,0.047180,-404.636749,136.320908,2.308172,-3.907071,...,-1.898315,-2.046493,-7.176277,-3.293508,4.209121,0.121835,-5.407063,-3.654926,-3.274857,FAKE
4,0.420055,0.016158,2526.069123,3102.659519,5025.077899,0.051905,-410.497925,152.731400,-18.266771,51.993462,...,-1.952340,0.810868,6.238493,6.555839,7.535542,2.849219,2.616843,-1.793357,-5.060998,FAKE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11773,0.435426,0.025303,2772.575031,2728.757601,4998.670213,0.074323,-342.309753,144.490418,-79.272942,8.890874,...,-17.982819,-7.831161,-1.127167,-7.669674,-0.653850,-8.037575,-2.671002,-4.483765,-3.355975,REAL
11774,0.454611,0.070578,1029.274601,1519.231563,1922.927486,0.026553,-332.230408,202.603012,-0.181929,-2.146542,...,-2.018668,-2.705635,-1.589172,-2.938737,-0.972690,-1.706672,-2.796168,2.171270,-1.660128,REAL
11775,0.374432,0.019063,4063.645317,3558.261357,7299.133512,0.110278,-372.149109,92.670235,-29.082432,59.736637,...,-6.628118,-3.827499,-7.287946,-2.899543,-11.508186,-1.296590,-14.325416,-4.405540,-15.869982,REAL
11776,0.410885,0.090499,1124.655596,1553.651133,2065.942806,0.031761,-328.062805,193.557526,6.779151,-1.304731,...,-5.437202,-4.252508,-1.258683,-2.107233,-1.018154,-2.716950,-3.681598,3.811063,3.948419,REAL


In [33]:
df.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
chroma_stft,29965.0,0.395463,0.090617,0.150902,0.330103,0.388381,0.451087,0.756753
rms,29965.0,0.047151,0.035505,0.000297,0.018399,0.04053,0.067505,0.321696
spectral_centroid,29965.0,2293.167409,694.870931,683.69242,1804.612796,2189.21745,2689.100367,5665.103886
spectral_bandwidth,29965.0,2139.073967,397.39519,755.011186,1873.436407,2138.131932,2413.649229,3608.06049
rolloff,29965.0,4227.765637,1304.156458,955.291193,3268.887052,4066.839045,5042.929355,9128.365257
zero_crossing_rate,29965.0,0.123934,0.057758,0.009277,0.083374,0.111339,0.151955,0.509976
mfcc1,29965.0,-309.819738,87.949068,-695.2553,-362.89105,-294.69186,-242.1667,-114.40583
mfcc2,29965.0,93.464118,33.92241,-44.36069,71.518486,94.98571,115.10151,232.08414
mfcc3,29965.0,-12.380376,26.935078,-128.54823,-25.353941,-7.077191,6.280376,72.27785
mfcc4,29965.0,12.332665,18.265384,-70.541046,1.264601,14.055121,24.80489,91.1892


In [34]:
data_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
chroma_stft,11778.0,0.421925,0.069392,0.200353,0.37233,0.417708,0.467741,0.707202
rms,11778.0,0.037894,0.028071,4.3e-05,0.015098,0.03187,0.054046,0.168958
spectral_centroid,11778.0,2719.201373,1066.754739,756.163229,2062.875836,2579.963841,3283.857559,17685.00669
spectral_bandwidth,11778.0,3050.299988,872.258653,1096.903152,2569.289971,3055.863462,3581.271953,7836.844404
rolloff,11778.0,4977.617722,2170.158448,1063.964279,3448.144178,4683.958018,6211.301553,21130.54467
zero_crossing_rate,11778.0,0.070821,0.039292,0.015504,0.045749,0.060237,0.085149,0.81248
mfcc1,11778.0,-382.562312,79.593413,-1055.002197,-432.928848,-365.756241,-321.772781,-193.430145
mfcc2,11778.0,145.056311,36.188506,-83.816765,120.522711,145.970162,168.321331,284.727997
mfcc3,11778.0,-24.699932,27.728976,-132.491104,-35.550402,-19.164229,-6.235028,67.475792
mfcc4,11778.0,21.311292,22.480432,-47.77,3.635681,22.218458,37.017731,86.585747


In [35]:
data_df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.338055,0.027948,2842.948867,4322.916759,6570.586186,0.041050,-462.169586,90.311272,19.073769,24.046888,...,-6.686564,0.902086,-7.251551,-1.198342,4.747403,-4.986279,0.953935,-5.013138,-6.779060,FAKE
1,0.443766,0.037838,2336.129597,3445.777044,3764.949874,0.047730,-409.413422,120.348808,-7.161531,5.114784,...,-2.131157,-6.876417,-1.359395,0.326401,-5.420016,-2.109968,-1.757634,-9.537907,-8.494421,FAKE
2,0.302528,0.056578,2692.988386,2861.133180,4716.610271,0.080342,-318.996033,120.490273,-24.625771,23.891073,...,-5.853725,-3.724773,-6.627182,-5.117002,-6.072106,-0.994653,-1.617120,-3.922354,-7.033001,FAKE
3,0.319933,0.031504,2241.665382,3503.766175,3798.641521,0.047180,-404.636749,136.320908,2.308172,-3.907071,...,-1.898315,-2.046493,-7.176277,-3.293508,4.209121,0.121835,-5.407063,-3.654926,-3.274857,FAKE
4,0.420055,0.016158,2526.069123,3102.659519,5025.077899,0.051905,-410.497925,152.731400,-18.266771,51.993462,...,-1.952340,0.810868,6.238493,6.555839,7.535542,2.849219,2.616843,-1.793357,-5.060998,FAKE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11773,0.435426,0.025303,2772.575031,2728.757601,4998.670213,0.074323,-342.309753,144.490418,-79.272942,8.890874,...,-17.982819,-7.831161,-1.127167,-7.669674,-0.653850,-8.037575,-2.671002,-4.483765,-3.355975,REAL
11774,0.454611,0.070578,1029.274601,1519.231563,1922.927486,0.026553,-332.230408,202.603012,-0.181929,-2.146542,...,-2.018668,-2.705635,-1.589172,-2.938737,-0.972690,-1.706672,-2.796168,2.171270,-1.660128,REAL
11775,0.374432,0.019063,4063.645317,3558.261357,7299.133512,0.110278,-372.149109,92.670235,-29.082432,59.736637,...,-6.628118,-3.827499,-7.287946,-2.899543,-11.508186,-1.296590,-14.325416,-4.405540,-15.869982,REAL
11776,0.410885,0.090499,1124.655596,1553.651133,2065.942806,0.031761,-328.062805,193.557526,6.779151,-1.304731,...,-5.437202,-4.252508,-1.258683,-2.107233,-1.018154,-2.716950,-3.681598,3.811063,3.948419,REAL


In [36]:
dataset

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.291005,0.100972,1503.819020,2003.900473,2640.264338,0.058461,-227.283997,117.363869,-21.856697,-5.956208,...,0.231222,-4.286140,-0.658060,-17.671778,0.492580,-8.939298,-10.583409,-9.856175,0.110423,FAKE
1,0.488654,0.005333,2757.999207,2607.776140,5640.965132,0.129239,-376.874908,82.629425,-4.113033,13.557878,...,-1.619721,-5.480615,-1.591572,-9.422898,-4.021746,-7.764452,-3.283262,-6.914111,-3.745474,FAKE
2,0.320947,0.059529,1876.721927,2279.032510,3907.786976,0.085582,-284.791443,104.583748,1.505778,11.288380,...,-4.458688,-9.660798,-4.906862,-12.124472,-6.083214,-8.779160,-4.301162,-9.839105,-2.189336,FAKE
3,0.317934,0.053302,2061.158776,2186.794174,3676.794434,0.099088,-258.284088,94.734474,1.924067,23.870354,...,-7.312897,-9.438956,-7.222197,-11.617744,-1.504134,-6.931713,-4.455357,-3.408775,-0.722356,FAKE
4,0.392725,0.048176,2213.382506,2198.764449,4162.759677,0.132568,-307.629211,100.989967,-14.262594,8.875005,...,-2.009577,-4.004642,-5.763664,-7.626153,-1.623855,-5.220128,-1.803224,-4.053137,2.000329,FAKE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29960,0.380453,0.032703,2666.514179,2112.847586,4668.300559,0.163064,-289.700653,79.301811,-34.634933,-2.610724,...,1.182776,-15.351231,-2.042419,-9.185847,-2.628170,-11.762793,-5.393685,-8.502969,-2.474539,REAL
29961,0.317955,0.036571,2310.894483,2100.396249,4078.339733,0.125388,-264.990753,87.606865,-33.935280,10.865050,...,-5.449326,-13.275034,5.052628,-9.755420,-6.369123,-11.137696,-3.693965,-7.176721,-5.510276,REAL
29962,0.417653,0.041490,1943.153338,1824.786318,3402.001398,0.105247,-205.100784,105.617233,-65.244415,-11.780135,...,-1.176161,-11.554142,-4.132590,-11.246902,0.278845,-11.095166,-3.507075,-4.943163,3.291279,REAL
29963,0.401598,0.037857,2042.405086,1803.789277,3407.874090,0.118852,-200.521667,85.288734,-90.497856,-33.224701,...,4.300089,-3.402192,-4.930651,-13.453515,-5.114316,-7.686623,1.444214,-2.637488,2.451039,REAL
