# Constructing The Model

## Imports

In [496]:
import IPython.display as ipd
import librosa # Compatible with python 3.10
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import torch
import torch.nn as nn
import torch.nn.functional  as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


from PIL import Image
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torchvision.transforms import v2

## Constants

#### Directories

In [497]:
# Assign paths
base_dir = "dataset"

# Use for-norm dataset
train_dir = os.path.join(base_dir, "for-norm", "for-norm", "training")
test_dir = os.path.join(base_dir, "for-norm", "for-norm", "testing")
val_dir = os.path.join(base_dir, "for-norm", "for-norm", "validation")

# Classified directories
train_dir_fake = os.path.join(train_dir, "fake")
train_dir_real = os.path.join(train_dir, "real")
test_dir_fake = os.path.join(test_dir, "fake")
test_dir_real = os.path.join(test_dir, "real")
val_dir_fake = os.path.join(val_dir, "fake")
val_dir_real = os.path.join(val_dir, "real")

In [498]:
# Check for mistakes
print("Base directory:", base_dir)
print()
print("Train directory:", train_dir)
print("Test directory:", test_dir)
print("Val directory:", val_dir)
print()
print("Train (fake) directory:", train_dir_fake)
print("Train (real) directory:", train_dir_real)
print("Test (fake) directory:", test_dir_fake)
print("Test (real) directory:", test_dir_real)
print("Val (fake) directory:", val_dir_fake)
print("Val (real) directory:", val_dir_real)

Base directory: dataset

Train directory: dataset/for-norm/for-norm/training
Test directory: dataset/for-norm/for-norm/testing
Val directory: dataset/for-norm/for-norm/validation

Train (fake) directory: dataset/for-norm/for-norm/training/fake
Train (real) directory: dataset/for-norm/for-norm/training/real
Test (fake) directory: dataset/for-norm/for-norm/testing/fake
Test (real) directory: dataset/for-norm/for-norm/testing/real
Val (fake) directory: dataset/for-norm/for-norm/validation/fake
Val (real) directory: dataset/for-norm/for-norm/validation/real


#### Other Constants

In [499]:
BATCH_SIZE = 16
NUM_WORKERS = 12

## Obtain Data

In [500]:
# Obtain waveform (.wav) audio files
train_fake_audio_path = [os.path.join(train_dir_fake, file) for file in os.listdir(train_dir_fake) if file.endswith(".wav")]
train_real_audio_path = [os.path.join(train_dir_real, file) for file in os.listdir(train_dir_real) if file.endswith(".wav")]

test_fake_audio_path = [os.path.join(test_dir_fake, file) for file in os.listdir(test_dir_fake) if file.endswith(".wav")]
test_real_audio_path = [os.path.join(test_dir_real, file) for file in os.listdir(test_dir_real) if file.endswith(".wav")]

validation_fake_audio_path = [os.path.join(val_dir_fake, file) for file in os.listdir(val_dir_fake) if file.endswith(".wav")]
validation_real_audio_path = [os.path.join(val_dir_real, file) for file in os.listdir(val_dir_real) if file.endswith(".wav")]

In [501]:
# Get training labels
train_labels = []

for i in train_fake_audio_path:
    train_labels.append(0)
for i in train_real_audio_path:
    train_labels.append(1)

# Convert list to Pandas dataframe
train_labels_df = pd.DataFrame({'label':train_labels})

# Get testing labels
val_labels = []

for i in validation_fake_audio_path:
    val_labels.append(0)
for i in validation_real_audio_path:
    val_labels.append(1)

# Convert list to Pandas dataframe
val_labels_df = pd.DataFrame({'label':val_labels})

In [502]:
train_labels_df.head(10)

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [503]:
val_labels_df.head(10)

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [504]:
#  Evaluate the training labels
count = 0

for i in range(len(train_labels)):
    if (train_labels[i] == 0):
        count = count+1

print(train_labels)
print("Number of total labels:", len(train_labels))
print("Number of fake audio samples:", count)
print("Number of real audio samples:", len(train_labels) - count)


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [505]:
#  Evaluate the testing labels
count = 0

for i in range(len(val_labels)):
    if (val_labels[i] == 0):
        count = count+1

print(val_labels)
print("Number of total labels:", len(val_labels))
print("Number of fake audio samples:", count)
print("Number of real audio samples:", len(val_labels) - count)


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [506]:
# Display a random real training sample audio
random_real_training_audio_file = train_real_audio_path[random.randint(0, 333)]
ipd.Audio(random_real_training_audio_file)

In [507]:
# Display a random fake training sample audio
random_fake_training_audio_file = train_fake_audio_path[random.randint(0, 333)]
ipd.Audio(random_fake_training_audio_file)


## Data Preprocessing

In [508]:
# Combine paths and labels directly
train_audio_paths = train_fake_audio_path + train_real_audio_path
train_labels = [0] * len(train_fake_audio_path) + [1] * len(train_real_audio_path)

val_audio_paths = validation_fake_audio_path + validation_real_audio_path
val_labels = [0] * len(validation_fake_audio_path) + [1] * len(validation_real_audio_path)

# Shuffle both datasets
train_data = list(zip(train_audio_paths, train_labels))
val_data = list(zip(val_audio_paths, val_labels))

random.shuffle(train_data)
random.shuffle(val_data)

In [509]:
# Training dataset
class CustomTrainingAudioDataset(Dataset):
    def __init__(self, labels_df, base_dir, transform=None, random=False):
        self.labels_df = labels_df # Audio labels_df
        self.base_dir = base_dir # Base audio directory
        self.transform = transform # Modify audio features
        # TODO: randomize order of samples and labels_df
    
    def __len__(self):
        return len(self.labels_df) # Number of audio samples
    
    def __getitem__(self, index):
        label = self.labels_df.iloc[index]["label"]
        # label = self.labels_df[index]

        if (label == 0):
            audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "fake", f"file{str(index+1)}.wav_16k.wav_norm.wav_mono.wav_silence.wav")
        if (label == 1):
            audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "real", f"file{str(index+1)}.wav_16k.wav_norm.wav_mono.wav_silence.wav")
       
        audio_data, sample_rate = librosa.load(audio_sample_path)
        audio_sample = ipd.Audio(data=audio_data, rate=sample_rate)

        if self.transform:
            audio_sample = self.transform(audio_sample)
        
        return audio_sample, label # An audio sample and a label at the corresponding index
    
# Training dataset
class CustomValidationAudioDataset(Dataset):
    def __init__(self, labels_df, base_dir, transform=None):
        self.labels_df = labels_df # Audio labels_df
        self.base_dir = base_dir # Base audio directory
        self.transform = transform # Modify audio features
    
    def __len__(self):
        return len(self.labels_df) # Number of audio samples
    
    def __getitem__(self, index):
        label = self.labels_df.iloc[index]["label"]
        # label = self.labels_df[index]

        if (label == 0):
            audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "fake", f"file{str(index+1)}.wav_16k.wav_norm.wav_mono.wav_silence.wav")
        if (label == 1):
            audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "real", f"file{str(index+1)}.wav_16k.wav_norm.wav_mono.wav_silence.wav")

        audio_data, sample_rate = librosa.load(audio_sample_path)
        audio_sample = ipd.Audio(data=audio_data, rate=sample_rate)

        if self.transform:
            audio_sample = self.transform(audio_sample)
        
        return audio_sample, label # An audio sample and a label at the corresponding index
    
# TODO: CustomTestingAudioDataset    

# # Validation dataset
# class CustomTestingAudioDataset(Dataset):
#     def __init__(self, labels_df, base_dir, transform=None):
#         self.labels_df = labels_df # Audio labels_df
#         self.base_dir = base_dir # Base audio directory
#         self.transform = transform # Modify audio features
    
#     def __len__(self):
#         return len(self.labels_df) # Number of audio samples
    
#     def __getitem__(self, index):
#         label = self.labels_df[index]

#         if (self.labels_df[index] == 0):
#             audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "fake", "file" + label + ".wav_16k.wav_norm.wav_mono.wav_silence.wav")
#         if (self.labels_df[index] == 1):
#             audio_sample_path = os.path.join(self.base_dir, "for-norm", "for-norm", "training", "real", "file" + label + ".wav_16k.wav_norm.wav_mono.wav_silence.wav")
       
#         audio_sample = ipd.Audio(audio_sample_path)

#         if self.transform:
#             audio_sample = self.transform(audio_sample)
        
#         return audio_sample, label # An audio sample and a label at the corresponding index

In [510]:
# Load train and test image datasets
train_dataset = CustomTrainingAudioDataset(
    labels_df=train_labels_df,
    base_dir=base_dir,
    random=True,
)
val_dataset = CustomValidationAudioDataset(
    labels_df=val_labels_df,
    base_dir=base_dir,
)
# TODO: CustomTestingAudioDataset

In [511]:
# Create Dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
# TODO: test_dataloader

## Designing The Model