#### In this document we want to create a script that will generate dataloaders
1. This is important because we have size constraints on collab
2. Also, it is good to have quick (repeatable and variable) dataloaders

In [26]:

import librosa.display
from numba import core
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import torch
import pickle
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


# parameters to change
video_files=[
    'a_oqcg0hvpo',
    'Gz99TTxmvls',
    'ubz5lz_l7IY',
    'LydoK7hXKbs',
]

ROOT_FILENAME = 'Dataloader'
SEED = 1 
BATCH_SIZE = 20
DATA_PER_FILE = 50


# globals we use below
tensors = {}
X = []
Y = []


In [27]:
def padd_with_zeros(twod_numpy, w, h):
    old_w = twod_numpy.shape[0]
    old_h = twod_numpy.shape[1]
    h = max(old_h, h)
    w = max(old_w, w)
    lx = (w - old_w) // 2
    rx = w - lx - old_w
    ly = (h - old_h) // 2
    ry = h - ly - old_h
    return np.pad(twod_numpy, pad_width=((lx, rx), (ly, ry)), mode='constant')

def process_input_file(filename):

    waveform, sample_rate = librosa.load(filename)

    n_fft = 512  # Number of FFT points (window size)
    hop_length = 1024  # Hop length (frame shift)

    # Compute the STFT
    stft = librosa.stft(waveform, n_fft=n_fft, hop_length=hop_length)

    padded = padd_with_zeros(stft, 260, 90)
    return  padded

def get_prediction(video_name, time_stamp):
    global tensors
    local_name = f'{video_name}{time_stamp[:3]}'
    if local_name not in tensors:
        t = torch.load(f'./resnet_predictions/{video_name}/{local_name}.pth')
        tensors[local_name] = t
    else:
        t = tensors[local_name]
    return t[f'frame{time_stamp}']

for video_name in video_files:
    subdir = os.listdir('./data/' + video_name + '-audio')
    random.seed(SEED)
    files = random.choices(subdir, k=DATA_PER_FILE)
    for file in files:
        time_stamp = file[5:file.find('.')]
        label = video_name + '-' + time_stamp
        audio_data = process_input_file(f'./data/{video_name}-audio/{file}')
        classification_res = get_prediction(video_name, time_stamp)

        X.append(np.abs(audio_data))
        Y.append(classification_res)
    print(video_name)


a_oqcg0hvpo
Gz99TTxmvls
ubz5lz_l7IY
LydoK7hXKbs


In [28]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        x = self.X[index]
        y = self.Y[index]
        return x, y

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.8, shuffle=True, random_state=SEED)

# Create custom datasets
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

# Create dataloaders
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE)

# Save train and test loaders to a file
pickle_file = 'Dataloaders/' + ROOT_FILENAME + '_BS' + str(BATCH_SIZE) + '_DPF' + str(DATA_PER_FILE) + '_S' + str(SEED) + '.pkl'
os.makedirs('Dataloaders', exist_ok=True)
with open(pickle_file, 'wb') as file:
    pickle.dump((train_loader, test_loader), file)

In [25]:
with open(pickle_file, 'rb') as file:
    train_loader, test_loader = pickle.load(file)

datasize = next(iter(train_loader))[0].shape
print(datasize)

torch.Size([40, 260, 90])
