In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import read, write
from IPython.display import Audio
import torch
from numpy.fft import fft, ifft         
import pandas as pd                 
from os import getcwd, listdir, chdir 
from os.path import join
import math, random
import torchaudio
from torchaudio import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.model_selection import train_test_split
from PIL import Image

%matplotlib inline


In [2]:
pathClassiBird="C:/Users/friez/OneDrive - Tilburg University/CSAI/Year_3_Sem_1/Software_Engineering/Project/Dataset/BirdImage"


unique = []
difSet = ['test','train', 'valid']
labelAll =[]
imageData = pd.DataFrame()

lbls=dict()
unique= listdir(join(pathClassiBird,difSet[1]))
for i in range(len(unique)):
    lbls[unique[i]]=i

for a in range(len(difSet)):
    difSetPath = join(pathClassiBird,difSet[a])
    l = listdir(difSetPath)
    for i in range(len(l)):
        label = l[i]
        imagePath = join(difSetPath,label)
        imageList = listdir(imagePath)
        for j in range(len(imageList)):
            frame = [difSet[a]]
            frame.append(label)
            frame.append(lbls[label])
            frame.append(imageList[j].split(".")[0])
            imageData=pd.concat([imageData,pd.Series(frame).to_frame().T],ignore_index=True)

imageData = imageData.rename(columns={0: "Set", 1: "Label", 2:"Numeric Label", 3: "Path"} )           
print(imageData)

         Set                    Label Numeric Label Path
0       test          ABBOTTS BABBLER             0    1
1       test          ABBOTTS BABBLER             0    2
2       test          ABBOTTS BABBLER             0    3
3       test          ABBOTTS BABBLER             0    4
4       test          ABBOTTS BABBLER             0    5
...      ...                      ...           ...  ...
75121  valid  YELLOW HEADED BLACKBIRD           449    1
75122  valid  YELLOW HEADED BLACKBIRD           449    2
75123  valid  YELLOW HEADED BLACKBIRD           449    3
75124  valid  YELLOW HEADED BLACKBIRD           449    4
75125  valid  YELLOW HEADED BLACKBIRD           449    5

[75126 rows x 4 columns]


In [3]:
# preprocessing the data
class ImageUtil():
    # checking assumptions before preprocessing the data

    # open audio file
    def open(path):
        img = Image.open(path)
        return (img)
    

In [4]:

path = join(pathClassiBird,"test", "ABBOTTS BABBLER",  "1.jpg")
im = ImageUtil.open(path)
im = im.resize((300,300))
im = torch.tensor(np.array(im))
print(im.permute(2,0,1).shape)

torch.Size([3, 300, 300])


In [5]:
# ----------------------------
# Sound Dataset
# ----------------------------
class ImageDS(Dataset):
  def __init__(self, df, data_path):
    self.df = df
    self.data_path = str(data_path)
            
  # ----------------------------
  # Number of items in dataset
  # ----------------------------
  def __len__(self):
    return len(self.df)    
    
  # ----------------------------
  # Get i'th item in dataset
  # ----------------------------
  def __getitem__(self, idx):
    # Get image in PIL format
    image_file = join(self.data_path, self.df.loc[idx,"Set"] ,self.df.loc[idx, "Label"],self.df.loc[idx, 'Path'])+".jpg"
    # Get the Class ID
    class_id = self.df.loc[idx, 'Numeric Label']

    img = ImageUtil.open(image_file)
    img = img.resize((300,300))
    img = torch.tensor(np.array(img)).permute(2,0,1)
    return img.float(), class_id

In [6]:
train_ds = ImageDS(imageData[imageData["Set"]=="train"].reset_index(), pathClassiBird)
test_ds  = ImageDS(imageData[imageData["Set"]=="test"].reset_index(), pathClassiBird)
val_ds   = ImageDS(imageData[imageData["Set"]=="valid"].reset_index(), pathClassiBird)


# Create training and validation data loaders
batchSize = 32
train_dl = torch.utils.data.DataLoader(train_ds, batch_size= batchSize,shuffle=True,pin_memory = True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batchSize, shuffle=False,pin_memory = True)

In [12]:
import torch.nn as nn
import torch.nn.functional as F

class ImageClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images.float()/255)                 # Generate predictions
        loss = F.cross_entropy(out, labels)            # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images.float()/255)                 # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))

In [13]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

# ----------------------------
# Audio Classification Model
# ----------------------------

class NaturalSceneClassification(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3, 32, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        
            nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Flatten(),
            nn.Linear(350464,1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512,450)
        )
    
    def forward(self, xb):
        return self.network(xb)
    
#simple_model = nn.Sequential(
 #   nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),
 #   nn.MaxPool2d(2, 2)
#)


In [14]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

  
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

  
def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.SGD):
    
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    
    return history

In [None]:
model = NaturalSceneClassification()
num_epochs = 2
opt_func = torch.optim.Adam
lr = 0.001
#fitting the model on training data and record the result after each epoch
history = fit(num_epochs, lr, model, train_dl, val_dl, opt_func)


def plot_accuracies(history):
    """ Plot the history of accuracies"""
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs');
    

plot_accuracies(history)

def plot_losses(history):
    """ Plot the losses in each epoch"""
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs');

plot_losses(history)