In [1]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import svm
from sklearn import metrics
import pandas as pd
import os
from matplotlib import pyplot as plt
import numpy as np
from pathlib import Path
import tensorflow as tf
import shutil
import librosa
from tqdm import tqdm_notebook
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

# **SVM Approach**

In [2]:
data = pd.read_csv('dataset_train_new.csv')
data = data.drop(['filename'],axis=1)
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y_train = encoder.fit_transform(genre_list)
scaler = StandardScaler()
X_train = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

data = pd.read_csv('dataset_dev_new.csv')
data = data.drop(['filename'],axis=1)
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y_dev = encoder.fit_transform(genre_list)
scaler = StandardScaler()
X_dev = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

data = pd.read_csv('dataset_eval_new.csv')
data = data.drop(['filename'],axis=1)
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y_eval = encoder.fit_transform(genre_list)
scaler = StandardScaler()
X_eval = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [3]:
clf = svm.SVC(kernel='rbf')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_dev)
print('Dev')
print("Accuracy:",metrics.accuracy_score(y_dev, y_pred))
print()
y_pred = clf.predict(X_eval)
print('Eval')
print("Accuracy:",metrics.accuracy_score(y_eval, y_pred))
print()

Dev
Accuracy: 0.8535729037587774

Eval
Accuracy: 0.8196755994358251



# **CNN Approaches**

In [4]:
def small_cnn(input_shape = (256, 256, 1), lr = 1e-3, factor = 16):
    img_input = layers.Input(input_shape)
    X = layers.Conv2D(factor, 3, padding = 'same', activation = 'relu')(img_input)
    X = layers.MaxPooling2D(pool_size = (2, 2))(X)
    X = layers.Conv2D(factor*2, 3, padding = 'same', activation = 'relu')(X)
    X = layers.MaxPooling2D(pool_size = (2, 2))(X)
    X = layers.Conv2D(factor*4, 3, padding = 'same', activation = 'relu')(X)
    X = layers.MaxPooling2D(pool_size = (2, 2))(X)
    X = layers.Conv2D(factor*8, 3, padding = 'same', activation = 'relu')(X)
    X = layers.MaxPooling2D(pool_size = (2, 2))(X)
    X = layers.Conv2D(factor*16, 3, padding = 'same', activation = 'relu')(X)
    X = layers.MaxPooling2D(pool_size = (2, 2))(X)
    X = layers.Flatten()(X)
    X = layers.Dense(128, activation = 'relu')(X)
    X = layers.Dropout(rate=0.5)(X)
    X = layers.Dense(1)(X)
    X = layers.Activation('sigmoid', dtype='float32', name='predictions')(X)
    model = models.Model(inputs = img_input, outputs = X)
    model.compile(optimizer = Adam(lr), loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [5]:
for dataset,modelpath in zip(['mel_spec_256_crop_first','amp_plot_256_crop_first_negative',],['mel_spec_model.h5','amp_plot_model.h5']):
    batch_size = 64
    target_size = (256, 256)
    color_mode = 'grayscale'
    val_datagen = ImageDataGenerator(rescale=1/255)
    val_generator = val_datagen.flow_from_directory(f'{dataset}/dev/', target_size=target_size, batch_size=batch_size, class_mode='binary', color_mode=color_mode, classes = ['real','fake'])
    test_datagen = ImageDataGenerator(rescale=1/255)
    test_generator = test_datagen.flow_from_directory(f'{dataset}/eval/', target_size=target_size, batch_size=batch_size, class_mode='binary', color_mode=color_mode, classes = ['real','fake'])
    print()
    model = small_cnn()
    model.load_weights(modelpath)
    print(f'Model: {modelpath}')
    results = model.evaluate(val_generator,steps=2*len(val_generator),verbose=0)
    print('Development Set:')
    print(f'Loss: {results[0]:.4f}\nAccuracy: {results[1]:.4f}')
    results = model.evaluate(test_generator,steps=2*len(test_generator),verbose=0)
    print('Evaluation Set:')
    print(f'Loss: {results[0]:.4f}\nAccuracy: {results[1]: .4f}')
    print()

Found 4816 images belonging to 2 classes.
Found 14090 images belonging to 2 classes.

Model: mel_spec_model.h5
Development Set:
Loss: 0.0501
Accuracy: 0.9817
Evaluation Set:
Loss: 0.3875
Accuracy:  0.8914

Found 4842 images belonging to 2 classes.
Found 14180 images belonging to 2 classes.

Model: amp_plot_model.h5
Development Set:
Loss: 0.3834
Accuracy: 0.8088
Evaluation Set:
Loss: 0.3431
Accuracy:  0.8434



# **LSTM Approach**

In [6]:
dataset = 'npys_32000'

In [7]:
def get_input(filepath):
    clip = np.load(filepath)
    sample_rate = 16000
    rmse = librosa.feature.rms(y=clip)
    chroma_stft = librosa.feature.chroma_stft(y=clip, sr=sample_rate)
    spec_cent = librosa.feature.spectral_centroid(y=clip, sr=sample_rate)
    spec_bw = librosa.feature.spectral_bandwidth(y=clip, sr=sample_rate)
    rolloff = librosa.feature.spectral_rolloff(y=clip, sr=sample_rate)
    zcr = librosa.feature.zero_crossing_rate(clip)
    mfcc = librosa.feature.mfcc(y=clip, sr=sample_rate)
    arr = np.concatenate([rmse,chroma_stft,spec_cent,spec_bw,rolloff,zcr,mfcc],axis=0).T
    arr = (arr - arr.mean(axis=0))/(arr.std(axis=0))
    return [arr]

def data_generator(dataset, split = 'train', batch_size = 8):

    assert batch_size % 2 == 0
    real_files = [f'{dataset}/{split}/real/{f}' for f in os.listdir(f'{dataset}/{split}/real') if '.ipynb' not in f]
    fake_files = [f'{dataset}/{split}/fake/{f}' for f in os.listdir(f'{dataset}/{split}/fake') if '.ipynb' not in f]

    while True:
        real_batch_paths = np.random.choice(a = real_files, size = batch_size // 2)
        fake_batch_paths = np.random.choice(a = fake_files, size = batch_size // 2)
        batch_input  = []
        batch_output = []

        for real_input_path, fake_input_path in zip(real_batch_paths, fake_batch_paths):
            batch_input += get_input(real_input_path)
            batch_input += get_input(fake_input_path)
            batch_output += [[0.],[1.]]
        # Return a tuple of (input, output) to feed the network
        batch_x = np.array(batch_input,dtype=np.float32)
        batch_y = np.array(batch_output,dtype=np.float32)

        yield(batch_x, batch_y)

In [8]:
batch_size = 32
dev_gen = data_generator(dataset, 'dev', batch_size = batch_size)
eval_gen = data_generator(dataset, 'eval', batch_size = batch_size)
dev_spe = len(os.listdir(f'{dataset}/dev/real'))//batch_size
eval_spe = len(os.listdir(f'{dataset}/eval/real'))//batch_size

In [9]:
def lstm(input_shape = (None, 37), lr = 1e-3):
    inputs = layers.Input(input_shape)
    X = layers.LSTM(64, return_sequences=True)(inputs)
    X = layers.LSTM(64, return_sequences=True)(X)
    X = layers.Dropout(rate=0.5)(X)
    X = layers.LSTM(128, return_sequences=True)(X)
    X = layers.LSTM(128, return_sequences=True)(X)
    X = layers.Dropout(rate=0.5)(X)
    X = layers.LSTM(256, return_sequences=False)(X)
    X = layers.Dropout(rate=0.5)(X)
    X = layers.Dense(128, activation = 'relu')(X)
    X = layers.Dropout(rate=0.5)(X)
    X = layers.Dense(1)(X)
    X = layers.Activation('sigmoid', dtype='float32', name='predictions')(X)
    #X = layers.Dense(1, activation = 'sigmoid')(X)
    model = models.Model(inputs = inputs, outputs = X)
    model.compile(optimizer = Adam(lr), loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [10]:
model = lstm()
model.load_weights('lstm_model.h5')

In [11]:
results = model.evaluate(dev_gen, verbose = 0, steps = 2*dev_spe)
print('Development Set:')
print(f'Loss: {results[0]:.4f}\nAccuracy: {results[1]:.4f}')
results = model.evaluate(eval_gen, verbose = 0, steps = 2*eval_spe)
print('Evaluation Set:')
print(f'Loss: {results[0]:.4f}\nAccuracy: {results[1]:.4f}')

Development Set:
Loss: 0.3529
Accuracy: 0.8758
Evaluation Set:
Loss: 0.5216
Accuracy: 0.8396
