In [1]:
import numpy as np

import torch
import torchvision
import torch.optim as optim
import torch.nn as nn

import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd

import tqdm
from tqdm import tqdm

import random
import pickle

In [2]:
#loading data
data = np.load("normalized430Cut50BinsDATA.npy")

In [3]:
def makeWindows(data, numWindows):
    windows = []
    intervalLength = int(430 / numWindows)
    for mspec in tqdm(range(len(data))):
        start = 0
        end = intervalLength
        for interval in range(numWindows):
            melspectrogram = data[mspec]
            window = melspectrogram[:,start:end]
            windows.append(window)
            start += intervalLength
            end += intervalLength
    return windows
        

In [4]:
# def makeWindows2(data, numWindows):
#     windows = []
#     intervalLength = int(430 / numWindows)
#     for mspec in tqdm(range(len(data))):
#         start = 0
#         end = intervalLength
#         for interval in range(numWindows):
#             melspectrogram = data[mspec]
#             window = melspectrogram[:,start:end]
#             windows.append([window, mspec, interval])
#             start += intervalLength
#             end += intervalLength
#     return windows
    

In [5]:
training_data = data[:10000]
testing_data = data[10000:12000]

windows = makeWindows(training_data, 5)
testWindows = makeWindows(testing_data, 5)

100%|██████████| 10000/10000 [00:00<00:00, 188962.40it/s]
100%|██████████| 2000/2000 [00:00<00:00, 199638.45it/s]


In [6]:
#taking a sample size of 10,000
#8:2 split (training_data:testing_data ratio)
np.random.shuffle(data)
training_data = data[:8000] #length of 8000
testing_data = data[8000:10000] #length of 2000

In [7]:
class CAE(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential( #starting size: (50,86)
            nn.Conv2d(1,32,3), #input: 50, 86 -> 48,84
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,3), #input: 24, 42 -> 22,40
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2) #input: 22, 40 -> 11, 20
            #final: 11, 20
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 3, stride=2), #input: 11,20 -> 23,41
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, 6, stride=2) #input: 23, 41 -> 50,86
            
            #final: 50,86
        )
        
    def forward(self,x):
        x = self.encoder(x)
        x=self.decoder(x)
        return x

In [8]:
# windows[0].shape

In [9]:
# test = CAE()

# o = test.encoder(torch.from_numpy(windows[0].reshape(-1,1,11,86)).type(torch.float32))
# o.shape

In [10]:
#creating the batches

BATCH_SIZE = 50
BATCHES = []

copy = windows

iterations = int((len(copy))/BATCH_SIZE)
for i in range(iterations):
    start = random.randint(0, len(copy) - BATCH_SIZE)
    end = start + BATCH_SIZE
    batch = copy[start:end]
    BATCHES.append(batch)
    copy = copy[:start] + copy[end:] #taking out the batch from the training_data
    #to prevent duplicate numbers from appearing across batches
   
print(len(copy))
print(f'Shape: {len(BATCHES)}, {len(BATCHES[0])}')

0
Shape: 1000, 50


In [11]:
#converting the python list to tensor

batched_training_data = np.array(BATCHES)
np.random.shuffle(batched_training_data)
batched_training_data = torch.from_numpy(batched_training_data).type(torch.float32)

In [12]:
len(BATCHES[0][0][0])

86

In [13]:
#training

model = CAE()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)

NUM_OF_EPOCH = 100

outputs = []

for epoch in range(NUM_OF_EPOCH):
    for i in range(len(batched_training_data)):
        output = model(batched_training_data[i].view(-1,1,50,86))
        loss = criterion(output, batched_training_data[i])
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    outputs.append((epoch, output, batched_training_data[i]))
    print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0, Loss: 93.0482
Epoch 1, Loss: 81.8604
Epoch 2, Loss: 77.5229
Epoch 3, Loss: 73.5105
Epoch 4, Loss: 69.1820
Epoch 5, Loss: 65.1417
Epoch 6, Loss: 62.8355
Epoch 7, Loss: 61.3433
Epoch 8, Loss: 60.2632
Epoch 9, Loss: 59.2318
Epoch 10, Loss: 58.2074
Epoch 11, Loss: 57.1804
Epoch 12, Loss: 56.3883
Epoch 13, Loss: 55.7704
Epoch 14, Loss: 55.3122
Epoch 15, Loss: 54.9210
Epoch 16, Loss: 54.4798
Epoch 17, Loss: 54.1906
Epoch 18, Loss: 53.9878
Epoch 19, Loss: 53.7938
Epoch 20, Loss: 53.4858
Epoch 21, Loss: 53.3365
Epoch 22, Loss: 53.1732
Epoch 23, Loss: 52.9584
Epoch 24, Loss: 52.7297
Epoch 25, Loss: 52.6275
Epoch 26, Loss: 52.5047
Epoch 27, Loss: 52.3898
Epoch 28, Loss: 52.3630
Epoch 29, Loss: 52.3178
Epoch 30, Loss: 52.2842
Epoch 31, Loss: 52.1988
Epoch 32, Loss: 52.1670
Epoch 33, Loss: 52.1382
Epoch 34, Loss: 52.1168
Epoch 35, Loss: 52.0887
Epoch 36, Loss: 52.0675
Epoch 37, Loss: 52.0275
Epoch 38, Loss: 52.0101
Epoch 39, Loss: 51.9947
Epoch 40, Loss: 51.9710
Epoch 41, Loss: 51.9511
Ep

KeyboardInterrupt: 

In [None]:
import pickle

filename = 'model_14.pt'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# model_5 = pickle.load(open('model_5.pt', 'rb'))

In [None]:
# testWindows[0].shape

In [None]:
model = pickle.load(open('model_14.pt','rb'))
a = 0
output = model(torch.from_numpy(testWindows[a].reshape(-1,1,50,86)).type(torch.float32))
output2 = model(torch.from_numpy(testWindows[2000].reshape(-1,1,50,86)).type(torch.float32))

In [None]:
plt.figure(figsize=(25,10))
librosa.display.specshow(output.view(50,86).detach().numpy(), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
plt.figure(figsize=(25,10))
librosa.display.specshow(testWindows[a].reshape(50,86), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
# plt.figure(figsize=(25,10))
# librosa.display.specshow(testing_data[0], x_axis = "time", y_axis = "mel", sr=22050)

# plt.colorbar(format="%+2.f")
# plt.show()