In [2]:
torch.cuda.is_available = lambda : False

In [3]:
from scipy.io import wavfile
import wave
import numpy as np
import torchaudio
import os
import pathlib
import torch
from torch.utils.data import TensorDataset, DataLoader
import pyaudio
from torchvision import transforms
from math import ceil

In [4]:
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE =  32000
CHUNK = 1024
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
class AudioDataset(torch.utils.data.Dataset):
    def __init__(self,input_data,transform=None):
        self.input_data = input_data
        self.transform=transform

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, index):
        label = self.input_data[index][0]
        audiochunk = self.input_data[index][1]
        if self.transform:
            audiochunk = self.transform(audiochunk)
        return audiochunk,label

In [7]:
def make_chunks_torch(audio_segment, chunk_length):
    """
    Breaks an AudioSegment into chunks that are <chunk_length> milliseconds
    long.
    if chunk_length is 50 then you'll get a list of 50 millisecond long audio
    segments back (except the last one, which can be shorter)
    """
    number_of_chunks = ceil(audio_segment.shape[1] / float(chunk_length))
    for i in range(int(number_of_chunks)):
        yield audio_segment[0,i * chunk_length:(i + 1) * chunk_length]

In [10]:
audio_data = []
i=0

sampleratecheck = 32000
folder = 'Audio' #Assume this is where all audio folders are located. This folder is at same level as script.
abspath = folder
subfolders = os.listdir(abspath)
for audiogroup in subfolders:
    print("Folder:",audiogroup)
    files = os.listdir(os.path.join(abspath,audiogroup))
    print("\tFiles:",files)
    for file in files:
        relpath = os.path.join(os.path.join(abspath,audiogroup),file)
        #print(f"\t\tLoading {relpath}")
        data,samplerate = torchaudio.load(relpath,normalize=False)
        if sampleratecheck==None:
            data = torchaudio.transforms.resample(samplerate,RATE)(data)
        data = torch.mean(data,dim=0).unsqueeze(0)
        data = torch.nn.functional.normalize(data)
        #print(data.shape)
        for chunk in make_chunks_torch(data,CHUNK):
            torchdatachunk = torch.cat([chunk,torch.zeros(CHUNK-len(chunk))])
            torchdatachunk = torch.reshape(torchdatachunk,(1,-1))
            audio_data.append((i,torchdatachunk))
        i+=1

batch_size = 200
train_ds = AudioDataset(audio_data)
train_loader = DataLoader(train_ds, batch_size=batch_size,shuffle=True)

Folder: 0-Ekko
	Files: ['Ekko_Ban.ogg', 'Ekko_Original_MoveFirst_0.ogg', 'Ekko_Original_MoveFirst_1.ogg', 'Ekko_Original_MoveFirst_2.ogg', 'Ekko_Original_Move_0.ogg', 'Ekko_Original_Move_1.ogg', 'Ekko_Original_Move_10.ogg', 'Ekko_Original_Move_11.ogg', 'Ekko_Original_Move_12.ogg', 'Ekko_Original_Move_13.ogg', 'Ekko_Original_Move_14.ogg', 'Ekko_Original_Move_15.ogg', 'Ekko_Original_Move_16.ogg', 'Ekko_Original_Move_17.ogg', 'Ekko_Original_Move_18.ogg', 'Ekko_Original_Move_19.ogg', 'Ekko_Original_Move_2.ogg', 'Ekko_Original_Move_20.ogg', 'Ekko_Original_Move_21.ogg', 'Ekko_Original_Move_22.ogg', 'Ekko_Original_Move_23.ogg', 'Ekko_Original_Move_24.ogg', 'Ekko_Original_Move_3.ogg', 'Ekko_Original_Move_4.ogg', 'Ekko_Original_Move_5.ogg', 'Ekko_Original_Move_6.ogg', 'Ekko_Original_Move_7.ogg', 'Ekko_Original_Move_8.ogg', 'Ekko_Original_Move_9.ogg', 'Ekko_Select.ogg']
Folder: 1-Kindred
	Files: ['Kindred.attack01.wav', 'Kindred.attack02.wav', 'Kindred.attack03.wav', 'Kindred.attack04.wav', 'Kin