In [6]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
import torchvision
from torchvision import transforms

BATCH_SIZE = 128
LEARNING_RATE = 0.003
TRAIN_DATA_PATH = "./data/spectrograms"
HEIGHT = 72
WIDTH = 113
TRANSFORM_IMG = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((72,113)),
    #transforms.CenterCrop(256),
    transforms.ToTensor(),
    ])

train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_data_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4)


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os



raw_folder = "data/spectrograms"
LABEL = 2

class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)
        
        x = torch.randn(HEIGHT, WIDTH).view(-1, 1, HEIGHT, WIDTH)
        self._to_linear = None
        self.convs(x)
        
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, LABEL)
        
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = Net()
net.to(device)
print(net)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=6400, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)


In [8]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.NLLLoss()

In [10]:
EPOCHS = 100
for epoch in range(EPOCHS):  # loop over the dataset multiple times
    running_loss = 0
    print()
    for step, (batch_X, batch_y) in enumerate(train_data_loader):
        net.zero_grad()
        outputs = net(batch_X.to(device))
        loss = loss_function(outputs, batch_y.to(device))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if step % (len(train_data_loader)-1) == 0 and step != 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, step + 1, running_loss / (len(train_data_loader)-1)))
            running_loss = 0.0


[1,    20] loss: -0.525

[2,    20] loss: -0.526

[3,    20] loss: -0.526

[4,    20] loss: -0.526

[5,    20] loss: -0.526

[6,    20] loss: -0.525

[7,    20] loss: -0.526

[8,    20] loss: -0.528

[9,    20] loss: -0.527

[10,    20] loss: -0.526

[11,    20] loss: -0.525

[12,    20] loss: -0.527

[13,    20] loss: -0.527

[14,    20] loss: -0.526

[15,    20] loss: -0.527

[16,    20] loss: -0.525

[17,    20] loss: -0.526

[18,    20] loss: -0.525

[19,    20] loss: -0.526

[20,    20] loss: -0.527

[21,    20] loss: -0.526

[22,    20] loss: -0.524

[23,    20] loss: -0.526

[24,    20] loss: -0.524

[25,    20] loss: -0.526

[26,    20] loss: -0.528

[27,    20] loss: -0.527

[28,    20] loss: -0.526

[29,    20] loss: -0.526

[30,    20] loss: -0.526

[31,    20] loss: -0.526

[32,    20] loss: -0.526

[33,    20] loss: -0.527

[34,    20] loss: -0.526

[35,    20] loss: -0.526

[36,    20] loss: -0.526

[37,    20] loss: -0.525

[38,    20] loss: -0.525

[39,    20] loss: -0

In [10]:
# Save model
torch.save(net.state_dict(), "m_classifier.pt")

In [16]:
# Load model
net2 = Net()
net2.load_state_dict(torch.load("m_classifier.pt"))
net2.eval()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=204800, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)

12