In [1]:
import numpy as np

import torch
import torchvision
import torch.optim as optim
import torch.nn as nn

import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd

import tqdm
from tqdm import tqdm

import random
import pickle

In [68]:
t = np.array((((1,1,1), (2,2,2)), ((3,3), (4,4))))
test = np.array([np.array([[1], [2]]), 10, 11])
print(test)
print(test.dtype)
print(t[0][1])

[array([[1],
        [2]]) 10 11]
object
(2, 2, 2)


  t = np.array((((1,1,1), (2,2,2)), ((3,3), (4,4))))
  test = np.array([np.array([[1], [2]]), 10, 11])


In [2]:
#loading data
data = np.load("normalized430Cut11BinsDATA.npy") 

In [3]:
def makeWindows2(data, numWindows):
    windows = []
    intervalLength = int(430 / numWindows)
    for mspec in tqdm(range(len(data))):
        start = 0
        end = intervalLength
        for interval in range(numWindows):
            melspectrogram = data[mspec]
            window = melspectrogram[:,start:end]
            windows.append([window, mspec, interval])
            start += intervalLength
            end += intervalLength
    return windows

def sortWindows(arr):#sorts from least to greatest in terms of the "mel-order"
    for mspec in range(len(arr)):
        for mspec2 in range(mspec, len(arr)):
            if arr[mspec][2] > arr[mspec2][2]:
                temp = arr[mspec]
                arr[mspec] = arr[mspec2]
                arr[mspec2] = temp
    return arr

def reconstruct(windows, numWindows, size): #reconstructs the Mel spectrograms after being window sliced and shuffled
    windows = windows.tolist()
    melspectrograms = []
    allRecon = []
    for melID in tqdm(range(size)):
        mspec = []
        for window in range(len(windows)):
            if windows[window][1] == melID:
                mspec.append(windows[window])
        sortWindows(mspec)
        mspec = np.array(mspec)
        whole = mspec[0][0]
        for time in range(1,numWindows):
            whole = np.concatenate((whole, mspec[time][0]), axis=1)
        whole = whole.tolist()
        allRecon.append(whole)
    return allRecon

In [4]:
training_data = data[:10000]
testing_data = data[10000:12000]

windows = makeWindows2(training_data, 5)
testWindows = makeWindows2(testing_data, 5)

np.random.shuffle(windows) #og size is 10,000
np.random.shuffle(testWindows) #og size is 2,000

100%|██████████| 10000/10000 [00:00<00:00, 169308.12it/s]
100%|██████████| 2000/2000 [00:00<00:00, 153724.79it/s]


In [5]:
#taking a sample size of 10,000
#8:2 split (training_data:testing_data ratio)
np.random.shuffle(data)
training_data = data[:8000] #length of 8000
testing_data = data[8000:10000] #length of 2000

In [6]:
class CAE(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential( #starting size: (10,86)
            nn.Conv2d(1,32,3), #input: 11, 86 -> 9, 84
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,3), #input: 4, 42 -> 2,40
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2) #input: 2, 40 -> 1, 20
            #final: 1, 17
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 3, stride=2), #input: 1,20 -> 3,41
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, 6, stride=2, output_padding = (1,0)) #input: 3, 41 -> 10,86
            
            #final: 10,86
        )
        
    def forward(self,x):
        x = self.encoder(x)
        x=self.decoder(x)
        return x

In [7]:
# windows[0].shape

In [8]:
# test = CAE()

# o = test.encoder(torch.from_numpy(windows[0].reshape(-1,1,11,86)).type(torch.float32))
# o.shape

In [9]:
#creating the batches

BATCH_SIZE = 50
BATCHES = []

copy = windows

iterations = int((len(copy))/BATCH_SIZE)
for i in range(iterations):
    start = random.randint(0, len(copy) - BATCH_SIZE)
    end = start + BATCH_SIZE
    batch = copy[start:end]
    BATCHES.append(batch)
    copy = copy[:start] + copy[end:] #taking out the batch from the training_data
    #to prevent duplicate numbers from appearing across batches
   
print(len(copy))
print(f'Shape: {len(BATCHES)}, {len(BATCHES[0])}')

0
Shape: 1000, 50


In [10]:
#converting the python list to tensor

batched_training_data = np.array(BATCHES)
batched_training_data = torch.from_numpy(batched_training_data).type(torch.float32)

  batched_training_data = np.array(BATCHES)


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [18]:
batched_training_data

array([[[array([[ 4.62728577e+01,  4.70385895e+01,  4.77376595e+01,
                  4.66161652e+01,  4.50893059e+01,  4.79107742e+01,
                  4.78867912e+01,  4.70270882e+01,  4.83323326e+01,
                  4.88050804e+01,  4.69841766e+01,  4.65106239e+01,
                  4.77279091e+01,  4.65287857e+01,  4.54125557e+01,
                  4.62794304e+01,  4.73514557e+01,  4.68865891e+01,
                  4.71204720e+01,  4.64369965e+01,  4.49686012e+01,
                  4.57367096e+01,  4.72765007e+01,  4.67859230e+01,
                  4.56441536e+01,  4.56455574e+01,  4.59585266e+01,
                  4.69419441e+01,  4.76776276e+01,  4.67422943e+01,
                  4.69762344e+01,  4.58869743e+01,  4.52905884e+01,
                  4.58525391e+01,  4.65573921e+01,  4.81162109e+01,
                  4.78952599e+01,  4.73036232e+01,  4.76494446e+01,
                  4.76281548e+01,  4.70901489e+01,  4.81754990e+01,
                  4.86692886e+01,  4.79512749e+0

In [None]:
#training

model = CAE()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)

NUM_OF_EPOCH = 100

outputs = []

for epoch in range(NUM_OF_EPOCH):
    for i in range(len(batched_training_data)):
        output = model(batched_training_data[i][0].view(-1,1,11,86))
        loss = criterion(output, batched_training_data[i][0])
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    outputs.append((epoch, output, batched_training_data[i][0]))
    print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

In [None]:
import pickle

filename = 'model_11.pt'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# model_5 = pickle.load(open('model_5.pt', 'rb'))

In [None]:
# testWindows[0].shape

In [None]:
# model = pickle.load(open('model_11.pt','rb'))
# a = 1000

# mspecs = reconstruct(testWindows, 5, 2000)

# output = model(torch.from_numpy(mspecs[a].reshape(-1,1,11,430)).type(torch.float32))
# output2 = model(torch.from_numpy(mspec[2000].reshape(-1,1,11,430)).type(torch.float32))

In [None]:
# plt.figure(figsize=(25,10))
# librosa.display.specshow(output.view(11,86).detach().numpy(), x_axis = "time", y_axis = "mel", sr=22050)

# plt.colorbar(format="%+2.f")
# plt.show()

In [None]:
# plt.figure(figsize=(25,10))
# librosa.display.specshow(testWindows[a].reshape(11,86), x_axis = "time", y_axis = "mel", sr=22050)

# plt.colorbar(format="%+2.f")
# plt.show()

In [None]:
# plt.figure(figsize=(25,10))
# librosa.display.specshow(testing_data[0], x_axis = "time", y_axis = "mel", sr=22050)

# plt.colorbar(format="%+2.f")
# plt.show()