In [1]:
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import os
from torchvision import transforms ,datasets

import helper
import waveLoader as wl
import importlib
importlib.reload(helper)

import matplotlib.pyplot as plt
from tqdm import tqdm

 

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [3]:
class Net(nn.Module):
    input_size = 100
    
    def __init__(self):
        super().__init__()
            # fc = fully connected
        self.conv1 = nn.Conv2d(1,6, 5).cuda()
        self.conv2 = nn.Conv2d(6, 16, 5).cuda()
        self.pool = nn.MaxPool2d(2,2).cuda()
        
        self.fc1 = nn.Linear(1,120).cuda()
        self.fc2 = nn.Linear(120,84).cuda()
        self.out = nn.Linear(84,2).cuda()
        
    def convs(self,x):
#         print(x.size()[0])
        batch_size = x.size()[0]
        x = x.view(batch_size,1,self.input_size,self.input_size)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
#         print(x.size())
        size = x.size()[1] * x.size()[2] * x.size()[3]
#         print(size)
        x = x.view(batch_size,size)
        self.fc1 = nn.Linear(size,120).cuda()
        return x
               
    def dense(self,x):
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x
    
    def forward(self,x,input_size = 100):
        self.input_size = input_size
        x = self.convs(x)
        x = self.dense(x)
        
        return x    

In [4]:
import math
class Audio():
    path = "data/Audio/train/"
    pathT = "data/Audio/train/"
    sub  = {"cats":0,"dogs":1}
    trainset = []
    testset = []
    batch_size = 4
    input_size = 300
    
    def __init__(self):
        self.net = Net().to(device)
        return
    
    def compress(self,x,size = 100):
        arrSize = x.size()[0]
        comSize = int(math.ceil(arrSize/(size**2)))
        ind = 0
        compressed = []
        while(ind < arrSize):
            z = x[ind:ind+comSize].sum()/comSize
            compressed.append(z)
            ind+=comSize
         
        if (len(compressed) > size**2):
            compressed = compressed[:size**2]
        elif (len(compressed) < size**2):
             for i in range((size**2)-len(compressed)):
                compressed.append(0)
                
        compressed = torch.FloatTensor(compressed)
        return compressed.view(-1,size),compressed
    
    def cut(self,x,size = 100):
        if(size%2 != 0):
            raise Exception("Invalid Size, Must be even size")
        _size = size
        self.input_size = size
        size = size**2 
        x_size = x.size()[0]
        compressed = []
        half = size/2
        half_x = x_size/2
        while(half_x + half > x_size and half_x >= 0 ):
           # print(half_x + half)
            half_x -= 1
        
        if (half_x == -1 or half_x - half < 0):
            raise Exception("Input out of Bound, Input Size cannot be cut ",x_size/2,half)
            
        left = int(half_x - half)
        right = int(half_x + half)
#         print(left,right,x)
        compressed = x[left:right].tolist()
        compressed = torch.FloatTensor(compressed)
#         print(compressed.size())
        return compressed.view(-1,_size),compressed
    
    def loadData(self,cut = 300):
        for label in self.sub:
            root = self.path+str(label)
            for i in tqdm(os.listdir(root)):
                try:
                    path = root+"/"+str(i)
                    frame,framerate = wl.readwavefile(path)
                    frame = torch.from_numpy(frame)
#                     print(frame)
#                     plt.plot(frame)
#                     plt.show()
#                     print(len(frame))
    #                 frame,_ = self.compress(frame,50)
                    frame,_ = self.cut(frame,cut)
#                     plt.plot(_)
#                     plt.show()

#                     print(frame.size())

                    self.trainset.append((frame,self.sub[str(label)]))
                except :
                    pass
        
        self.trainset = torch.utils.data.DataLoader(self.trainset, 
                                       batch_size=self.batch_size,
                                       shuffle=True,num_workers=2)
        torch.save(self.trainset,"temp/audio.pt")
    
    def loadTest(self,cut = 300):
        for label in self.sub:
            root = self.pathT+str(label)
            for i in tqdm(os.listdir(root)):
                try:
                    path = root+"/"+str(i)
                    frame,framerate = wl.readwavefile(path)
                    frame = torch.from_numpy(frame)
#                     print(frame)
#                     plt.plot(frame)
#                     plt.show()
#                     print(len(frame))
    #                 frame,_ = self.compress(frame,50)
                    frame,_ = self.cut(frame,cut)
#                     plt.plot(_)
#                     plt.show()

#                     print(frame.size())

                    self.testset.append((frame,self.sub[str(label)]))
                except :
                    pass
        
        self.testset = torch.utils.data.DataLoader(self.testset, 
                                       batch_size=self.batch_size,
                                       shuffle=True,num_workers=2)
        torch.save(self.testset,"temp/audio_test.pt")
    
    def load(self,path):
        self.trainset = torch.load(path)
    
    def loadT(self,path):
        self.testset = torch.load(path)
    
    def train(self,epochs=3):
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.net.parameters(), lr=0.001, momentum=0.9)
        trainset = self.trainset 
        for epoch in tqdm(range(epochs)):
            for data in self.trainset :
#                 try:
                    x,y = data
                    x,y = x.to(device),y.to(device)
                    self.net.zero_grad() 
                    output = self.net.forward(x,input_size = self.input_size)
                    loss = criterion(output, y)
                    loss.backward()
                    optimizer.step()
#                 except Exception as e:
#                     print(e)
#                     break
                
            print(loss)
            self.test(self.trainset)
        
 
 
    def test(self,x): # test
        correct = 0
        total = 0
        trainset = x
        co = 0
        with torch.no_grad():
            for data in trainset:
                co+=1
                if(co == 20): 
                    break
                x,y = data
                x,y = x.to(device),y.to(device)
               # print("input",x.size())
                output = self.net.forward(x,input_size = self.input_size)
               # print(output,y)
                for idx,i in enumerate(output):
                    if torch.argmax(i) == y[idx]:
                        correct += 1
                    total += 1
        print("Accuracy : ",round(correct/total,3)*100,"%")
        return 0
        

In [5]:
aud = Audio()
aud.loadData(cut = 400)

100%|███████████████████████████████████████████████████████████████████████████████| 125/125 [00:00<00:00, 150.00it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 206.59it/s]


In [6]:
aud.load("temp/audio.pt")

In [7]:
aud.train(epochs = 10)

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)


 10%|████████▎                                                                          | 1/10 [00:08<01:15,  8.43s/it]

Accuracy :  82.89999999999999 %
tensor(0.7396, device='cuda:0', grad_fn=<NllLossBackward>)


 20%|████████████████▌                                                                  | 2/10 [00:16<01:06,  8.35s/it]

Accuracy :  82.89999999999999 %
tensor(0.5256, device='cuda:0', grad_fn=<NllLossBackward>)


 30%|████████████████████████▉                                                          | 3/10 [00:25<01:00,  8.58s/it]

Accuracy :  82.89999999999999 %
tensor(0.4755, device='cuda:0', grad_fn=<NllLossBackward>)


 40%|█████████████████████████████████▏                                                 | 4/10 [00:33<00:50,  8.46s/it]

Accuracy :  84.2 %
tensor(0.8444, device='cuda:0', grad_fn=<NllLossBackward>)


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:43<00:43,  8.77s/it]

Accuracy :  82.89999999999999 %
tensor(0.8740, device='cuda:0', grad_fn=<NllLossBackward>)


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:51<00:34,  8.72s/it]

Accuracy :  82.89999999999999 %
tensor(0.3766, device='cuda:0', grad_fn=<NllLossBackward>)


 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:59<00:25,  8.41s/it]

Accuracy :  82.89999999999999 %
tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [01:07<00:16,  8.20s/it]

Accuracy :  84.2 %
tensor(0.9529, device='cuda:0', grad_fn=<NllLossBackward>)


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [01:14<00:08,  8.00s/it]

Accuracy :  84.2 %
tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:22<00:00,  8.24s/it]

Accuracy :  82.89999999999999 %





In [8]:
aud.loadTest()   

100%|███████████████████████████████████████████████████████████████████████████████| 125/125 [00:00<00:00, 263.84it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 340.91it/s]


AttributeError: 'Audio' object has no attribute 'testset'

In [None]:
aud.loadT("temp/audio_test.pt")
aud.test(aud.testset)