In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import tqdm
from tqdm import tqdm
import librosa
import librosa.display
import IPython.display as ipd

import random

In [2]:
#loading data
data = np.load("normalizedCutExpandedDATA.npy")

In [3]:
#taking a sample size of 10,000
#8:2 split (training_data:testing_data ratio)
np.random.shuffle(data)
training_data = data[:8000] #length of 8000
testing_data = data[8000:10000] #length of 2000

In [4]:
transform = transforms.ToTensor()

In [43]:
class CAE(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential( #starting size: (100,431)
            nn.Conv2d(1,32,5), #input: 100, 431 -> 96,427
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,32,7), #input: 48, 213 -> 42,207
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,8), #input: 21, 103 -> 14,96
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2), #input: 14, 96 -> 7, 48
            #final: 1, 106
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 48, 3, stride=1), #input: 7,48 -> 9,50
            nn.ReLU(),
            nn.ConvTranspose2d(48, 32, 7, stride=2), #input: 9,50 -> 23,105
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 6, stride=2), #input: 23, 105 -> 50, 214
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 5, stride=2), #input: 50, 214 -> 103, 431
            
            #final: 10,431
        )
        
    def forward(self,x):
        x = self.encoder(x)
        x=self.decoder(x)
        x = x[:,:,:100,:]
        return x

In [6]:
#creating the batches

BATCH_SIZE = 32
BATCHES = []

copy = training_data.tolist()

iterations = int((len(copy))/BATCH_SIZE)
for i in range(iterations):
    start = random.randint(0, len(copy) - BATCH_SIZE)
    end = start + BATCH_SIZE
    batch = copy[start:end]
    BATCHES.append(batch)
    copy = copy[:start] + copy[end:] #taking out the batch from the training_data
    #to prevent duplicate numbers from appearing across batches
   
print(len(copy))
print(f'Shape: {len(BATCHES)}, {len(BATCHES[0])}')

0
Shape: 250, 32


In [7]:
#converting the python list to tensor

batched_training_data = np.array(BATCHES)
batched_training_data = torch.from_numpy(batched_training_data).type(torch.float32)

In [44]:
#training

model = CAE()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)

NUM_OF_EPOCH = 250

outputs = []

for epoch in range(NUM_OF_EPOCH):
    for i in range(len(batched_training_data)):
        output = model(batched_training_data[i].view(-1,1,100,431))
        loss = criterion(output, batched_training_data[i])
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    outputs.append((epoch, output, batched_training_data[i]))
    print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0, Loss: 89.8978
Epoch 1, Loss: 70.6342
Epoch 2, Loss: 63.3134
Epoch 3, Loss: 59.1917
Epoch 4, Loss: 56.3926
Epoch 5, Loss: 53.5761
Epoch 6, Loss: 50.8648


KeyboardInterrupt: 

In [None]:
# len(batched_training_data)

In [None]:
t = np.load("control.npy")
t = t[:, :431]
mspecTensor = torch.from_numpy(t) 
mspecTensor = model(mspecTensor.view(-1,1,100,431))
show = (t, mspecTensor)

plt.figure(figsize=(25,10))
librosa.display.specshow(show[0], x_axis = "time", y_axis = "mel", sr=22050)
#librosa.display.specshow(show[0].view(10,474).detach().numpy(), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
# show[0]

In [None]:
plt.figure(figsize=(25,10))
#librosa.display.specshow(show[0], x_axis = "time", y_axis = "mel", sr=22050)
librosa.display.specshow(show[1].view(100,431).detach().numpy(), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
# np.save("controlResult4.npy", show[1].detach().numpy())

In [None]:
controlResult = np.load("controlResult.npy")
plt.figure(figsize=(25,10))
librosa.display.specshow(controlResult.reshape(100,474), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
#might as well save what I have done

# path = "C:\\Users\\JasonZhou\\Models"
# torch.save(model, path)

import pickle

filename = 'model_7.pt'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# controlResult = np.load("controlResult.npy")
plt.figure(figsize=(25,10))
librosa.display.specshow(testing_data[0].reshape(100,431), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()

In [None]:
pic = testing_data[0]
pic = pic[:, :431]
o = torch.from_numpy(pic)
o = model(o.view(-1,1,100,431))
plt.figure(figsize=(25,10))
librosa.display.specshow(o.view(100,431).detach().numpy(), x_axis = "time", y_axis = "mel", sr=22050)

plt.colorbar(format="%+2.f")
plt.show()