In [1]:
import torch
import torch.nn as nn
import librosa.display
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import os
import numpy as np
from IPython import display
import pandas as pd
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
import sklearn as sk
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix 

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
#dataset initialization
dfInput=pd.read_csv('/code/dataset/FakeAVCeleb/MergedMetadataLabeled.csv')
dfCut=dfInput[['wavLocation','Audio_Label','source']]
dfFake=dfCut[dfCut['Audio_Label']==1].reset_index(drop=True)
dfReal=dfCut[dfCut['Audio_Label']==0].reset_index(drop=True)
dfTraining=pd.concat([dfReal[0:8000],dfFake[0:8000]]).reset_index(drop=True)
dfValidation=pd.concat([dfReal[8000:],dfFake[8000:]]).reset_index(drop=True)
labelsTrain=dfTraining['Audio_Label']
labelsVal=dfValidation['Audio_Label']

In [2]:
##generate MFCCs for Real and Fake Audiofiles
r_mfccs=[]
r_spec_data=[]
r_sampling_rate=[]
for i in range(0,len(r_spec_data)):
    data,sampling_rate=librosa.load(dfReal['wavLocation'][i])
    r_spec_data+=[data]
    r_sampling_rate+=[sampling_rate]
    mfccs = librosa.feature.mfcc(data, sr=sampling_rate, n_mfcc=20,dtype=np.float64)
    r_mfccs.append(mfccs)

f_mfccs=[] 
f_spec_data=[]
f_sampling_rate=[]
for i in range(0,len(f_spec_data)):
    data,sampling_rate=librosa.load(dfFake['wavLocation'][i])
    f_spec_data=[data]
    f_sampling_rate+=[sampling_rate]
    mfccs = librosa.feature.mfcc(data, sr=sampling_rate, n_mfcc=20,dtype=np.float64)
    f_mfccs.append(mfccs)

In [5]:
dfCut[dfCut['Audio_Label']==1]

Unnamed: 0,wavLocation,Audio_Label,source
500,/code/dataset/FakeAVCeleb/RealVideo-FakeAudio/...,1,id00076
501,/code/dataset/FakeAVCeleb/RealVideo-FakeAudio/...,1,id00166
502,/code/dataset/FakeAVCeleb/RealVideo-FakeAudio/...,1,id00173
503,/code/dataset/FakeAVCeleb/RealVideo-FakeAudio/...,1,id00366
504,/code/dataset/FakeAVCeleb/RealVideo-FakeAudio/...,1,id00391
...,...,...,...
21561,/code/dataset/FakeAVCeleb/FakeVideo-FakeAudio/...,1,id09181
21562,/code/dataset/FakeAVCeleb/FakeVideo-FakeAudio/...,1,id09181
21563,/code/dataset/FakeAVCeleb/FakeVideo-FakeAudio/...,1,id09181
21564,/code/dataset/FakeAVCeleb/FakeVideo-FakeAudio/...,1,id09181


In [3]:
import audioread

In [47]:
len(dfCut[dfCut['Audio_Label']==0])

10209

In [9]:
dfCut_Sample=dfCut.sample(n=500, replace=True,random_state=42).reset_index()

In [10]:
type(dfCut_Sample)

pandas.core.frame.DataFrame

In [70]:
temp_list=dfCut_Sample['wavLocation']

In [71]:
temp_list[0]

'/code/dataset/FakeAVCeleb/FakeVideo-FakeAudio/Caucasian (European)/men/id01154/00118_id01052_wavtolip.mp4.wav'

In [11]:
def load_data(sr=16000):
    temp_list = dfCut_Sample['wavLocation']
    r_audio = []
    f_audio = []
    for i in range(0,len(dfCut_Sample)):
        if dfCut_Sample['Audio_Label'][i]==0:
            r_audio.append(librosa.load(temp_list[i],sr=sr/2))
            #with audioread.audio_open(temp_list[i]) as input_file:
                #channel_r.append(input_file.channels)
        else:
            f_audio.append(librosa.load(temp_list[i],sr=sr/2))
            #with audioread.audio_open(temp_list[i]) as input_file:
                #channel_f.append(input_file.channels)
        
            
    print('Real audio files loaded: ' + str(len(r_audio)) + ' samples')
    print('Fake audio files loaded: ' + str(len(f_audio)) + ' samples')
    return r_audio,f_audio

In [12]:
#Loading of the Data / Counting of Channels 
r_audio,f_audio = load_data()
n_r = len(r_audio)
n_f = len(f_audio)

Real audio files loaded: 239 samples
Fake audio files loaded: 261 samples


In [57]:
len(r[0])

2

In [52]:
len(r[0][0])

161600

In [13]:
def compute_mel_frequencies(r_audio,f_audio,sr=16000):
    r_mel_frequencies = []
    f_mel_frequencies = []
    
    for i in range(len(r_audio)):
        r_mel_frequencies.append(librosa.feature.melspectrogram(y=np.array(r_audio[i][0]),sr=sr/2))
    for i in range(len(f_audio)):
        f_mel_frequencies.append(librosa.feature.melspectrogram(y=np.array(f_audio[i][0]),sr=sr/2))
        
    return r_mel_frequencies,f_mel_frequencies

In [14]:
def compute_mfccs(r_mel_frequencies,f_mel_frequencies,sr=16000):
    r_mfccs = []
    f_mfccs = []
    
    for i in range(len(r_mel_frequencies)):
        r_mfccs.append(librosa.feature.mfcc(S=librosa.power_to_db(r_mel_frequencies[i]),sr=sr/2))
    for i in range(len(f_mel_frequencies)):
        f_mfccs.append(librosa.feature.mfcc(S=librosa.power_to_db(f_mel_frequencies[i]),sr=sr/2))
        
    return r_mfccs,f_mfccs

In [15]:
def build_df_mfccs(r_mfccs,f_mfccs):
    df_r = pd.DataFrame(np.transpose(r_mfccs[0]),index=np.ones(len(np.transpose(r_mfccs[0])))*(0+1))
    for i in range(1,len(r_mfccs)):
        df_temp = pd.DataFrame(np.transpose(r_mfccs[i]),index=np.ones(len(np.transpose(r_mfccs[i])))*(i+1))
        frames = [df_r, df_temp]
        df_r = pd.concat(frames)
    df_r['Label']=0
    
    df_f = pd.DataFrame(np.transpose(f_mfccs[0]),index=np.ones(len(np.transpose(f_mfccs[0])))*(0+len(r_mfccs)+1))
    for i in range(1,len(f_mfccs)):
        df_temp = pd.DataFrame(np.transpose(f_mfccs[i]),index=np.ones(len(np.transpose(f_mfccs[i])))*(i+len(r_mfccs)+1))
        frames = [df_f, df_temp]
        df_f = pd.concat(frames)
    df_f['Label']=1
    
    frames = [df_r, df_f]
    result = pd.concat(frames)
    
    result = result.sample(frac=1)
    
    return result

In [16]:


def remove_frames(r_audio,f_audio,r_frames,f_frames,sr=16000):
    
    detect_r = []
    detect_f = []
    
    for r in r_audio:
        detect_r.append(librosa.onset.onset_detect(y=r[0]))
    for f in f_audio:
        detect_f.append(librosa.onset.onset_detect(y=f[0]))
    
    
    new_r_frames = []
    new_f_frames = []
    
    for k in range(len(r_frames)):
        new_r_frames.append([[r_frames[k][i][j] for j in detect_r[k]] for i in range(len(r_frames[0]))])   
    for k in range(len(f_frames)):
        new_f_frames.append([[f_frames[k][i][j] for j in detect_f[k]] for i in range(len(f_frames[0]))])
        
    return new_r_frames,new_f_frames
    



NameError: name 'r_frames' is not defined

In [17]:
def split_data(r_frames,f_frames,n_r,n_f,test_size=0.3):
    y_r = np.zeros(n_r)
    y_f = np.ones(n_f)

    X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(r_frames, y_r, test_size= test_size,random_state=42)
    X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(f_frames, y_f, test_size= test_size,random_state=42)

    df_train = build_df_mfccs(X_train_r,X_train_f)
    df_test = build_df_mfccs(X_test_r,X_test_f)
    
    return df_train,df_test

In [88]:
y_r=np.zeros(n_r)

2342

In [18]:


def log_reg(df_train,df_test):
    logreg = LogisticRegression()
    X_train=df_train.iloc[:,0:df_train.shape[1]-1]
    X_test = df_test.iloc[:,0:df_train.shape[1]-1]
    y_train=df_train['Label']
    logreg.fit(X_train, y_train)
    y_pred_train = logreg.predict(X_train)
    y_pred_test = logreg.predict(X_test)

    pred_train_series = pd.Series(y_pred_train,index = df_train.index)
    pred_group_train_series = pred_train_series.groupby(pred_train_series.index).mean()
    pred_train_boolean = pred_group_train_series >=0.5
    y_pred_train_last= pred_train_boolean*1

    pred_test_series = pd.Series(y_pred_test,index = df_test.index)
    pred_group_test_series = pred_test_series.groupby(pred_test_series.index).mean()
    pred_test_boolean = pred_group_test_series >=0.5
    y_pred_test_last= pred_test_boolean*1

    y_train = df_train['Label']
    y_train_last = y_train.groupby(y_train.index).mean()

    y_test = df_test['Label']
    y_test_last = y_test.groupby(y_test.index).mean()
    
    return y_train_last,y_pred_train_last,y_test_last,y_pred_test_last



In [34]:
n_r

1

In [19]:
def calculate_accuracies(y_train,y_pred_train,y_test,y_pred_test):
    train_accuracy = np.sum(np.array([y_train==y_pred_train]))/len(y_train)
    test_accuracy = np.sum(np.array([y_test==y_pred_test]))/len(y_test)
    return train_accuracy, test_accuracy
    

In [30]:
def class_report(y_train,y_pred_train,y_test,y_pred_test,train_accuracy,test_accuracy):
    
    print('\n\nTraining Set:\n')
    print('Training Confusion Matrix:')
    print(confusion_matrix(y_train, y_pred_train))
    
    print('Training Classification report:')
    print(classification_report(y_train, y_pred_train))
    
    print('Train Accuracy: ' + str(train_accuracy))
    
    print('\n\nTest Set:\n')
    print('Test Confusion Matrix:')
    print(confusion_matrix(y_test, y_pred_test))
    
    print('Test Classification report:')
    print(classification_report(y_test, y_pred_test))

    print('Test Accuracy: ' + str(test_accuracy))

In [20]:
def cross_validate(df_train,folds):
    
    C_range = np.logspace(-2, 10, 13)
    gamma_range = np.logspace(-9, 3, 13)
    results=np.empty((folds,len(C_range),len(gamma_range)))
    i=0
    j=0
    k=0
    X = np.unique(df_train.index.values)
    np.random.shuffle(X)
    kf = KFold(n_splits=3)
    kf.get_n_splits(X)
    
    for train_index, test_index in kf.split(X):
        X_train, X_test = df_train.loc[X[train_index]], df_train.loc[X[test_index]]

        for C in C_range:
            for gamma in gamma_range:
                y_train_last_svm,y_pred_train_last_svm,y_test_last_svm,y_pred_test_last_svm = classification_svm(X_train,X_test,C,gamma)
                train_acc,test_acc = calculate_accuracies(y_train=y_train_last_svm, y_pred_train = y_pred_train_last_svm, y_test = y_test_last_svm,y_pred_test = y_pred_test_last_svm)
                results[k,i,j]=test_acc
                j+=1
            i+=1
            j=0
        k+=1
        i=0
    average_acc = np.sum(results,axis=0)/folds
    indexes_max = np.unravel_index(np.argmax(average_acc, axis=None), average_acc.shape)
    best_C = C_range[indexes_max[0]]    
    best_gamma = gamma_range[indexes_max[1]] 
        
    return best_C, best_gamma

ModuleNotFoundError: No module named 'svm'

In [22]:
def classification_svm(df_train,df_test,C,gamma):
    clf = svm.SVC(C=C, gamma = gamma)
    X_train=df_train.iloc[:,0:df_train.shape[1]-1]
    X_test = df_test.iloc[:,0:df_train.shape[1]-1]
    y_train=df_train['Label']
    clf.fit(X_train, y_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)

    pred_train_series = pd.Series(y_pred_train,index = df_train.index)
    pred_group_train_series = pred_train_series.groupby(pred_train_series.index).mean()
    pred_train_boolean = pred_group_train_series >=0.5
    y_pred_train_last= pred_train_boolean*1

    pred_test_series = pd.Series(y_pred_test,index = df_test.index)
    pred_group_test_series = pred_test_series.groupby(pred_test_series.index).mean()
    pred_test_boolean = pred_group_test_series >=0.5
    y_pred_test_last= pred_test_boolean*1

    y_train = df_train['Label']
    y_train_last = y_train.groupby(y_train.index).mean()

    y_test = df_test['Label']
    y_test_last = y_test.groupby(y_test.index).mean()
    
    return y_train_last,y_pred_train_last,y_test_last,y_pred_test_last

In [36]:
n_f

0

In [23]:
#Computing of the Mel-scaled spectrograms for each audio file
r_mel_frequencies, f_mel_frequencies = compute_mel_frequencies(r_audio,f_audio)

In [113]:
len(f)

0

In [24]:
#Computing of the Mel-frequency cepstral coefficients for each audio file
r_mfccs, f_mfccs = compute_mfccs(r_mel_frequencies,f_mel_frequencies)

In [116]:
len(f_mfccs)

2658

In [25]:
#Removing silent frames for delta features and mfccs for each audio file
new_r_mfccs,new_f_mfccs = remove_frames(r_audio,f_audio,r_mfccs,f_mfccs)
#new_r_deltas, new_f_deltas = remove_frames(cats,dogs,cats_deltas, dogs_deltas)

In [99]:
len(new_f_mfccs[0])

IndexError: list index out of range

In [26]:
df_train,df_test = split_data(new_r_mfccs,new_f_mfccs,n_r=n_r,n_f=n_f,test_size=0.3)

In [27]:
#Plot onset detection results of audio file cats_1
detect = librosa.onset.onset_detect(y=x)
onset_times = librosa.frames_to_time(detect)
plt.plot(onset_times, np.zeros_like(onset_times) + 0, 'x')
plt.show()
print("Non silent frame indexes: " + str(detect))

NameError: name 'x' is not defined

In [87]:
df_train,df_test = split_data(new_r_mfccs,new_f_mfccs,n_r=n_r,n_f=n_f,test_size=0.3)

ValueError: Found input variables with inconsistent numbers of samples: [1, 2342]

In [31]:
y_train_last_lr,y_pred_train_last_lr,y_test_last_lr,y_pred_test_last_lr = log_reg(df_train,df_test)
train_acc_lr,test_acc_lr = calculate_accuracies(y_train=y_train_last_lr, y_pred_train = y_pred_train_last_lr, y_test = y_test_last_lr,y_pred_test = y_pred_test_last_lr)
class_report(y_train_last_lr,y_pred_train_last_lr,y_test_last_lr,y_pred_test_last_lr,train_acc_lr,test_acc_lr)



Training Set:

Training Confusion Matrix:
[[137  30]
 [ 53 129]]
Training Classification report:
              precision    recall  f1-score   support

         0.0       0.72      0.82      0.77       167
         1.0       0.81      0.71      0.76       182

    accuracy                           0.76       349
   macro avg       0.77      0.76      0.76       349
weighted avg       0.77      0.76      0.76       349

Train Accuracy: 0.7621776504297995


Test Set:

Test Confusion Matrix:
[[56 16]
 [34 45]]
Test Classification report:
              precision    recall  f1-score   support

         0.0       0.62      0.78      0.69        72
         1.0       0.74      0.57      0.64        79

    accuracy                           0.67       151
   macro avg       0.68      0.67      0.67       151
weighted avg       0.68      0.67      0.67       151

Test Accuracy: 0.6688741721854304


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [29]:
from sklearn import svm

In [124]:
C,gamma = cross_validate(df_train,folds=1)
y_train_last_svm,y_pred_train_last_svm,y_test_last_svm,y_pred_test_last_svm = classification_svm(df_train,df_test,C,gamma)
train_acc_svm,test_acc_svm = calculate_accuracies(y_train=y_train_last_svm, y_pred_train = y_pred_train_last_svm, y_test = y_test_last_svm,y_pred_test = y_pred_test_last_svm)
class_report(y_train_last_svm,y_pred_train_last_svm,y_test_last_svm,y_pred_test_last_svm,train_acc_svm,test_acc_svm)