# 0. Package Loading

In [2]:
import pandas as pd 
import numpy as np 
import librosa
import matplotlib.pyplot as plt
import os
import json
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import StandardScaler
from torch import nn
import torch
import torch.nn.functional as F
from torch.utils import data
from tqdm import tqdm
import copy
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# 1. Feature Extraction

We need to generate more training examples since 1000 songs is not all that much. Slicing each song in 10 sub-parts seems like a reasonable size in order to retain enough information from their acoustic signature.

In [3]:
def feature_aggregation(array):
    return np.hstack([array.mean(axis=0, keepdims=True), array.std(axis=0, keepdims=True)])

In [4]:
def data_preprocess(num_slices, duration, sr, samples_per_slice):
    data = None

    song_count = 1
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(r'genres')):
        if not filenames:
            continue
        print(f'Making features for genre {i}')
        for file in filenames:
            y, sr = librosa.load(os.path.join(dirpath, file), duration=duration)
            for j in range(num_slices):
                start = samples_per_slice * j
                end = start + samples_per_slice
                mfcc = librosa.feature.mfcc(y=y[start:end], sr=sr).T
                rms = librosa.feature.rms(y=y[start:end]).T
                spectral_centroid = librosa.feature.spectral_centroid(y=y[start:end], sr=sr).T
                spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y[start:end], sr=sr).T
                spectral_rolloff = librosa.feature.spectral_rolloff(y=y[start:end], sr=sr).T
                zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y[start:end]).T
                tempo = librosa.feature.tempo(y=y[start:end], sr=sr).reshape(1,-1)
                mfcc = feature_aggregation(mfcc)
                rms = feature_aggregation(rms)
                spectral_centroid = feature_aggregation(spectral_centroid)
                spectral_bandwidth = feature_aggregation(spectral_bandwidth)
                spectral_rolloff = feature_aggregation(spectral_rolloff)
                zero_crossing_rate = feature_aggregation(zero_crossing_rate)
                data_piece = np.hstack([mfcc, rms, spectral_centroid, spectral_bandwidth, spectral_rolloff, zero_crossing_rate, tempo])
                data_piece = np.hstack([data_piece, [[i-1]]])
                data_piece = np.hstack([data_piece, [[song_count]]])
                if not isinstance(data, np.ndarray):
                    data = data_piece
                else:
                    data = np.vstack([data, data_piece])
            song_count += 1
    np.save('data.npy', data)

    return

In [5]:
def data_loading(path):
    data = np.load(path)

    X = data[:,:-2]
    y = data[:,-2]
    groups = data[:,-1]
    return X, y, groups

In [6]:
num_slices = 10
duration = 29
sr = 22050
samples_per_slice = int(sr*duration/num_slices)

In [7]:
# data_preprocess(num_slices, duration, sr, samples_per_slice)
X, y, groups = data_loading('data.npy')

# 2. Model Training

Since we split each song into 10 slices and we have 10 classes, we want to do a stratified and grouped train validation test split to ensure that each subset has roughly the same class distribution (balanced) and each song's different slices will not be split into different subsets.

In [8]:
def stratified_group_train_val_test_split(X, y, groups):

    sgkf_1 = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=1)
    for i, (train_val_index, test_index) in enumerate(sgkf_1.split(X, y, groups)):
        break
    sgkf_2 = StratifiedGroupKFold(n_splits=9, shuffle=True, random_state=1)
    X_train_val = X[train_val_index]
    y_train_val = y[train_val_index]
    groups_train_val = groups[train_val_index]
    for i, (train_index, val_index) in enumerate(sgkf_2.split(X_train_val, y_train_val, groups_train_val)):
        break   

    X_train, y_train = X_train_val[train_index], y_train_val[train_index]
    X_val, y_val = X_train_val[val_index], y_train_val[val_index]
    X_test, y_test = X[test_index], y[test_index]

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [9]:
X_train, y_train, X_val, y_val, X_test, y_test = stratified_group_train_val_test_split(X, y, groups)

In [10]:
X_train.shape

(7990, 51)

In [11]:
# set dataloader
def form_dataloader(X, y, batch_size, shuffle):
    X = torch.tensor(X).float() # change float64 to float32
    y = torch.tensor(y).type(torch.LongTensor)
    data_tensor = data.TensorDataset(X,y)
    return data.DataLoader(data_tensor, batch_size, shuffle)

In [12]:
shuffle = True
batch_size = 16
train_data_iter = form_dataloader(X_train, y_train, batch_size, shuffle)
val_data_iter = form_dataloader(X_val, y_val, batch_size, shuffle)
test_data_iter = form_dataloader(X_test, y_test, batch_size, shuffle)

In [13]:
# design model structure
class FCNN(nn.Module):
    def __init__(self, input_size, output_size, activation):
        super(FCNN,self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        
        # define the layers needed
        # fcnn
        self.fcnn = nn.Sequential(
            nn.Linear(input_size,256), 
            activation, 
            # nn.Dropout(0.5),
            nn.Linear(256,128), 
            activation, 
            # nn.Dropout(0.5), 
            nn.Linear(128,64), 
            activation, 
            # nn.Dropout(0.5), 
            nn.Linear(64,32), 
            activation, 
            # nn.Dropout(0.5), 
            nn.Linear(32,16), 
            activation, 
            # nn.Dropout(0.5), 
            nn.Linear(16,output_size), 
            nn.Softmax(dim=-1))
        

    # used to forward propagate, will be called automatically
    # x here is the actual mini-batch that will be fed into the model, so contains sample dimension
    def forward(self, x):
        return self.fcnn(x)

In [14]:
input_size = X_train.shape[-1]
output_size = 10
activation = nn.ReLU()
model = FCNN(input_size, output_size, activation)
model

FCNN(
  (fcnn): Sequential(
    (0): Linear(in_features=51, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=16, bias=True)
    (9): ReLU()
    (10): Linear(in_features=16, out_features=10, bias=True)
    (11): Softmax(dim=-1)
  )
)

In [15]:
class Trainer():
    def __init__(self, epochs: int, lr:float, loss: str, optim: str, model, call_back:dict):
        self.epochs = epochs
        self.lr = lr
        self.loss = loss
        self.optim = optim
        # change data, model, and loss function onto GPU
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.call_back = call_back
        # store the batch sizes list
        self.batch_size_list = []
        
        
    def train(self, train_data_iter: data.DataLoader, val_data_iter: data.DataLoader):
        # define loss
        loss = self.__loss()
        loss = loss.to(self.device)
        # define optimizer
        optimizer = self.__optimizer()
        # check if we need to reduce lr dynamically
        if 'reducelr' in self.call_back.keys():
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer = optimizer, **self.call_back['reducelr'])
        # store the training and validating losses for each epoch (average of each batch)
        self.train_losses = []
        self.val_losses = []
        self.train_acc = []
        self.val_acc = []
        # for each epoch
        for epoch in range(self.epochs):
            loop = tqdm(enumerate(train_data_iter), total = len(iter(train_data_iter)))
            train_loss_epoch = []
            train_acc_epoch = 0
            # for each batch
            # batch variable stands for the Data stored in dataloader
            for index, (X,y) in loop:
                # store batch size using the first epoch
                if epoch == 0:
                    self.batch_size_list.append(X.shape[0])
                X = X.to(self.device)
                y = y.to(self.device)
                
                # calculate the loss (feed forward)
                output = self.model(X)
                l = loss(output,y)
                # clear the previous grads
                optimizer.zero_grad()
                # back propagate
                l.backward()
                # update weights
                optimizer.step()
                # calculate the training loss
                with torch.no_grad():
                    train_loss = l.cpu().data
                    train_loss_epoch.append(train_loss) 
                    train_acc_epoch += (output.argmax(-1) == y).sum().float().cpu().item()
                    

                
                # update information
                loop.set_description(f'Epoch [{epoch+1}/{self.epochs}]')
            
            # calculate the sample-average training loss
            train_loss = round(np.sum(np.array(train_loss_epoch) * np.array(self.batch_size_list)) / np.sum(self.batch_size_list),6)
            train_acc = round(train_acc_epoch / np.sum(self.batch_size_list), 4)
            # append the train averaged loss
            self.train_losses.append(train_loss)
            self.train_acc.append(train_acc)
            # use validation to evaluate
            val_loss, val_acc = self.evaluation(val_data_iter, loss)
            self.val_losses.append(val_loss)
            self.val_acc.append(val_acc)
            # print the result
            print(f'Train loss after epoch {(epoch+1):d} is: {train_loss:f}')
            print(f'Validation loss after epoch {(epoch+1):d} is: {val_loss:f}')
            print(f'Train Accuracy after epoch {(epoch+1):d} is: {train_acc:f}')
            print(f'Validation Accuracy after epoch {(epoch+1):d} is: {val_acc:f}')

            # callbacks:
            # reduce learning rate
            if 'reducelr' in self.call_back.keys():
                scheduler.step(val_loss)
            # Earlystopping
            if 'earlystopping' in self.call_back.keys():
                # store the best validation loss
                if epoch == 0:
                    self.best_loss = val_loss+0.1
                    self.patience = 0
                    self.best_weights = copy.deepcopy(self.model.state_dict())
                earlystopping = self.early_stop(val_loss)
                if earlystopping:
                    print('Early Stopping ...')
                    # save the best model without patience
                    self.model.load_state_dict(self.best_weights)
                    del self.best_weights
                    break
        
        # clear cache
        self.__release_cache()
        
        return
    
    def evaluation(self, val_data_iter: data.DataLoader, loss):
        val_batch_list = []
        val_loss_list = []
        val_acc = 0
        for (X,y) in val_data_iter:
            X = X.to(self.device)
            y = y.to(self.device)
            val_batch_list.append(X.shape[0])
            
            with torch.no_grad():
                output = self.model(X)
                val_loss = loss(output,y).cpu().data
                val_loss_list.append(val_loss)
                val_acc += (output.argmax(-1) == y).sum().float().cpu().item()
        # take average over all samples
        val_loss = round(np.sum(np.array(val_loss_list) * np.array(val_batch_list)) / np.sum(val_batch_list),6)
        val_acc = round(val_acc / np.sum(val_batch_list), 4)
        return val_loss, val_acc
    
    def early_stop(self, val_loss):
        params = self.call_back['earlystopping']
        if val_loss >= self.best_loss * (1 + params['threshold']):
            self.patience += 1
            print(f'Patience: {self.patience:d}')
            if self.patience >= params['patience']:
                return True
        elif val_loss < self.best_loss:
            self.best_loss = val_loss
            self.patience = 0
            # save the best model without patience
            self.best_weights = copy.deepcopy(self.model.state_dict())
        return False
        
        
    def training_graph(self):
        fig = make_subplots(rows=1, cols=2)
        fig.add_trace(go.Scatter(x=np.arange(self.epochs), y=self.train_losses, mode='lines', name='Train Loss'), row=1, col=1)
        fig.add_trace(go.Scatter(x=np.arange(self.epochs), y=self.val_losses, mode='lines', name='Validation Loss'), row=1, col=1)
        fig.add_trace(go.Scatter(x=np.arange(self.epochs), y=self.train_acc, mode='lines', name='Train Accuracy'), row=1, col=2)
        fig.add_trace(go.Scatter(x=np.arange(self.epochs), y=self.val_acc, mode='lines', name='Validation Accuracy'), row=1, col=2)
        fig.update_layout(
            title="Metrics Plot",
        ) 
        fig.update_yaxes(title_text="Loss", row=1, col=1)
        fig.update_yaxes(title_text="Accuracy", row=1, col=2)
        fig.update_xaxes(title_text="Epoch", row=1, col=1)
        fig.update_xaxes(title_text="Epoch", row=1, col=2)
        iplot(fig)
        
        return
        
    def save(self, path):
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.model, path+'model.pth')
    
    def __loss(self):
        if self.loss == 'nll':
            return nn.NLLLoss() # already take the average over batch_size
        elif self.loss == 'crossentorpy':
            return nn.CrossEntropyLoss()
        else:
            raise ValueError('Undefined Loss Function.')

        
    def __optimizer(self):
        if self.optim == 'sgd':
            return torch.optim.SGD(self.model.parameters(), lr=self.lr)
        elif self.optim == 'Adam':
            return torch.optim.Adam(self.model.parameters(), lr=self.lr)
        elif self.optim == 'rms':
            return torch.optim.RMSprop(self.model.parameters(), lr=self.lr)
        
    def __release_cache(self):
        torch.cuda.empty_cache()

In [16]:
call_back = {'earlystopping':{'patience':20, 'threshold':0}, 
             'reducelr':{'factor':0.25, 'patience':5, 'threshold':0, 'min_lr': 1e-6, 'verbose':True}}
epochs = 1000
lr = 0.001
trainer = Trainer(epochs = epochs, lr = lr, loss = 'crossentorpy', optim = 'Adam', model = model, call_back = call_back)
trainer.train(train_data_iter, val_data_iter)

Epoch [1/1000]: 100%|██████████| 500/500 [00:03<00:00, 165.43it/s]


Train loss after epoch 1 is: 2.044987
Validation loss after epoch 1 is: 1.959665
Train Accuracy after epoch 1 is: 0.408400
Validation Accuracy after epoch 1 is: 0.500000


Epoch [2/1000]: 100%|██████████| 500/500 [00:01<00:00, 377.50it/s]


Train loss after epoch 2 is: 1.895976
Validation loss after epoch 2 is: 1.898344
Train Accuracy after epoch 2 is: 0.562500
Validation Accuracy after epoch 2 is: 0.556400


Epoch [3/1000]: 100%|██████████| 500/500 [00:01<00:00, 403.25it/s]


Train loss after epoch 3 is: 1.848669
Validation loss after epoch 3 is: 1.867850
Train Accuracy after epoch 3 is: 0.609800
Validation Accuracy after epoch 3 is: 0.594100


Epoch [4/1000]: 100%|██████████| 500/500 [00:01<00:00, 404.63it/s]


Train loss after epoch 4 is: 1.811735
Validation loss after epoch 4 is: 1.872722
Train Accuracy after epoch 4 is: 0.650700
Validation Accuracy after epoch 4 is: 0.581200
Patience: 1


Epoch [5/1000]: 100%|██████████| 500/500 [00:01<00:00, 380.81it/s]


Train loss after epoch 5 is: 1.783306
Validation loss after epoch 5 is: 1.828282
Train Accuracy after epoch 5 is: 0.678000
Validation Accuracy after epoch 5 is: 0.623800


Epoch [6/1000]: 100%|██████████| 500/500 [00:01<00:00, 373.52it/s]


Train loss after epoch 6 is: 1.755594
Validation loss after epoch 6 is: 1.840179
Train Accuracy after epoch 6 is: 0.706500
Validation Accuracy after epoch 6 is: 0.615800
Patience: 1


Epoch [7/1000]: 100%|██████████| 500/500 [00:01<00:00, 386.37it/s]


Train loss after epoch 7 is: 1.741778
Validation loss after epoch 7 is: 1.854708
Train Accuracy after epoch 7 is: 0.719400
Validation Accuracy after epoch 7 is: 0.602000
Patience: 2


Epoch [8/1000]: 100%|██████████| 500/500 [00:01<00:00, 363.59it/s]


Train loss after epoch 8 is: 1.725906
Validation loss after epoch 8 is: 1.819697
Train Accuracy after epoch 8 is: 0.738200
Validation Accuracy after epoch 8 is: 0.637600


Epoch [9/1000]: 100%|██████████| 500/500 [00:01<00:00, 396.63it/s]


Train loss after epoch 9 is: 1.708146
Validation loss after epoch 9 is: 1.829791
Train Accuracy after epoch 9 is: 0.753400
Validation Accuracy after epoch 9 is: 0.627700
Patience: 1


Epoch [10/1000]: 100%|██████████| 500/500 [00:01<00:00, 367.35it/s]


Train loss after epoch 10 is: 1.701843
Validation loss after epoch 10 is: 1.783634
Train Accuracy after epoch 10 is: 0.759100
Validation Accuracy after epoch 10 is: 0.678200


Epoch [11/1000]: 100%|██████████| 500/500 [00:01<00:00, 380.40it/s]


Train loss after epoch 11 is: 1.691909
Validation loss after epoch 11 is: 1.793842
Train Accuracy after epoch 11 is: 0.769300
Validation Accuracy after epoch 11 is: 0.660400
Patience: 1


Epoch [12/1000]: 100%|██████████| 500/500 [00:01<00:00, 374.99it/s]


Train loss after epoch 12 is: 1.679379
Validation loss after epoch 12 is: 1.768096
Train Accuracy after epoch 12 is: 0.781200
Validation Accuracy after epoch 12 is: 0.691100


Epoch [13/1000]: 100%|██████████| 500/500 [00:01<00:00, 368.69it/s]


Train loss after epoch 13 is: 1.671381
Validation loss after epoch 13 is: 1.747172
Train Accuracy after epoch 13 is: 0.790500
Validation Accuracy after epoch 13 is: 0.715800


Epoch [14/1000]: 100%|██████████| 500/500 [00:01<00:00, 368.23it/s]


Train loss after epoch 14 is: 1.671452
Validation loss after epoch 14 is: 1.770382
Train Accuracy after epoch 14 is: 0.789400
Validation Accuracy after epoch 14 is: 0.689100
Patience: 1


Epoch [15/1000]: 100%|██████████| 500/500 [00:01<00:00, 396.92it/s]


Train loss after epoch 15 is: 1.660703
Validation loss after epoch 15 is: 1.805932
Train Accuracy after epoch 15 is: 0.801600
Validation Accuracy after epoch 15 is: 0.653500
Patience: 2


Epoch [16/1000]: 100%|██████████| 500/500 [00:01<00:00, 397.98it/s]


Train loss after epoch 16 is: 1.661213
Validation loss after epoch 16 is: 1.786244
Train Accuracy after epoch 16 is: 0.800800
Validation Accuracy after epoch 16 is: 0.673300
Patience: 3


Epoch [17/1000]: 100%|██████████| 500/500 [00:01<00:00, 386.24it/s]


Train loss after epoch 17 is: 1.654982
Validation loss after epoch 17 is: 1.768827
Train Accuracy after epoch 17 is: 0.805400
Validation Accuracy after epoch 17 is: 0.689100
Patience: 4


Epoch [18/1000]: 100%|██████████| 500/500 [00:01<00:00, 387.34it/s]


Train loss after epoch 18 is: 1.651889
Validation loss after epoch 18 is: 1.793658
Train Accuracy after epoch 18 is: 0.808600
Validation Accuracy after epoch 18 is: 0.666300
Patience: 5


Epoch [19/1000]: 100%|██████████| 500/500 [00:01<00:00, 368.08it/s]


Train loss after epoch 19 is: 1.647372
Validation loss after epoch 19 is: 1.759106
Train Accuracy after epoch 19 is: 0.813400
Validation Accuracy after epoch 19 is: 0.697000
Epoch 00019: reducing learning rate of group 0 to 2.5000e-04.
Patience: 6


Epoch [20/1000]: 100%|██████████| 500/500 [00:01<00:00, 386.59it/s]


Train loss after epoch 20 is: 1.613113
Validation loss after epoch 20 is: 1.743828
Train Accuracy after epoch 20 is: 0.848900
Validation Accuracy after epoch 20 is: 0.713900


Epoch [21/1000]: 100%|██████████| 500/500 [00:01<00:00, 327.77it/s]


Train loss after epoch 21 is: 1.603101
Validation loss after epoch 21 is: 1.736662
Train Accuracy after epoch 21 is: 0.859300
Validation Accuracy after epoch 21 is: 0.724800


Epoch [22/1000]: 100%|██████████| 500/500 [00:01<00:00, 328.40it/s]


Train loss after epoch 22 is: 1.594930
Validation loss after epoch 22 is: 1.747780
Train Accuracy after epoch 22 is: 0.866600
Validation Accuracy after epoch 22 is: 0.709900
Patience: 1


Epoch [23/1000]: 100%|██████████| 500/500 [00:01<00:00, 364.34it/s]


Train loss after epoch 23 is: 1.588968
Validation loss after epoch 23 is: 1.743190
Train Accuracy after epoch 23 is: 0.873100
Validation Accuracy after epoch 23 is: 0.713900
Patience: 2


Epoch [24/1000]: 100%|██████████| 500/500 [00:01<00:00, 322.33it/s]


Train loss after epoch 24 is: 1.585893
Validation loss after epoch 24 is: 1.753430
Train Accuracy after epoch 24 is: 0.876100
Validation Accuracy after epoch 24 is: 0.706900
Patience: 3


Epoch [25/1000]: 100%|██████████| 500/500 [00:01<00:00, 289.55it/s]


Train loss after epoch 25 is: 1.581853
Validation loss after epoch 25 is: 1.748922
Train Accuracy after epoch 25 is: 0.880200
Validation Accuracy after epoch 25 is: 0.709900
Patience: 4


Epoch [26/1000]: 100%|██████████| 500/500 [00:01<00:00, 316.64it/s]


Train loss after epoch 26 is: 1.578005
Validation loss after epoch 26 is: 1.745499
Train Accuracy after epoch 26 is: 0.883600
Validation Accuracy after epoch 26 is: 0.713900
Patience: 5


Epoch [27/1000]: 100%|██████████| 500/500 [00:01<00:00, 358.17it/s]


Train loss after epoch 27 is: 1.574743
Validation loss after epoch 27 is: 1.762350
Train Accuracy after epoch 27 is: 0.886500
Validation Accuracy after epoch 27 is: 0.694100
Epoch 00027: reducing learning rate of group 0 to 6.2500e-05.
Patience: 6


Epoch [28/1000]: 100%|██████████| 500/500 [00:01<00:00, 284.87it/s]


Train loss after epoch 28 is: 1.571433
Validation loss after epoch 28 is: 1.751422
Train Accuracy after epoch 28 is: 0.890200
Validation Accuracy after epoch 28 is: 0.705900
Patience: 7


Epoch [29/1000]: 100%|██████████| 500/500 [00:01<00:00, 335.28it/s]


Train loss after epoch 29 is: 1.567971
Validation loss after epoch 29 is: 1.747288
Train Accuracy after epoch 29 is: 0.893600
Validation Accuracy after epoch 29 is: 0.709900
Patience: 8


Epoch [30/1000]: 100%|██████████| 500/500 [00:01<00:00, 342.90it/s]


Train loss after epoch 30 is: 1.566880
Validation loss after epoch 30 is: 1.748849
Train Accuracy after epoch 30 is: 0.894500
Validation Accuracy after epoch 30 is: 0.707900
Patience: 9


Epoch [31/1000]: 100%|██████████| 500/500 [00:01<00:00, 365.99it/s]


Train loss after epoch 31 is: 1.565699
Validation loss after epoch 31 is: 1.749172
Train Accuracy after epoch 31 is: 0.896100
Validation Accuracy after epoch 31 is: 0.705900
Patience: 10


Epoch [32/1000]: 100%|██████████| 500/500 [00:01<00:00, 387.11it/s]


Train loss after epoch 32 is: 1.564530
Validation loss after epoch 32 is: 1.747755
Train Accuracy after epoch 32 is: 0.897100
Validation Accuracy after epoch 32 is: 0.709900
Patience: 11


Epoch [33/1000]: 100%|██████████| 500/500 [00:01<00:00, 374.57it/s]


Train loss after epoch 33 is: 1.563580
Validation loss after epoch 33 is: 1.750917
Train Accuracy after epoch 33 is: 0.897700
Validation Accuracy after epoch 33 is: 0.705900
Epoch 00033: reducing learning rate of group 0 to 1.5625e-05.
Patience: 12


Epoch [34/1000]: 100%|██████████| 500/500 [00:01<00:00, 376.29it/s]


Train loss after epoch 34 is: 1.562471
Validation loss after epoch 34 is: 1.747492
Train Accuracy after epoch 34 is: 0.899700
Validation Accuracy after epoch 34 is: 0.712900
Patience: 13


Epoch [35/1000]: 100%|██████████| 500/500 [00:01<00:00, 369.39it/s]


Train loss after epoch 35 is: 1.562008
Validation loss after epoch 35 is: 1.747844
Train Accuracy after epoch 35 is: 0.899900
Validation Accuracy after epoch 35 is: 0.711900
Patience: 14


Epoch [36/1000]: 100%|██████████| 500/500 [00:01<00:00, 390.89it/s]


Train loss after epoch 36 is: 1.561798
Validation loss after epoch 36 is: 1.748231
Train Accuracy after epoch 36 is: 0.899900
Validation Accuracy after epoch 36 is: 0.710900
Patience: 15


Epoch [37/1000]: 100%|██████████| 500/500 [00:01<00:00, 380.69it/s]


Train loss after epoch 37 is: 1.561628
Validation loss after epoch 37 is: 1.748345
Train Accuracy after epoch 37 is: 0.900000
Validation Accuracy after epoch 37 is: 0.707900
Patience: 16


Epoch [38/1000]: 100%|██████████| 500/500 [00:01<00:00, 393.91it/s]


Train loss after epoch 38 is: 1.561492
Validation loss after epoch 38 is: 1.748787
Train Accuracy after epoch 38 is: 0.900000
Validation Accuracy after epoch 38 is: 0.708900
Patience: 17


Epoch [39/1000]: 100%|██████████| 500/500 [00:01<00:00, 410.27it/s]


Train loss after epoch 39 is: 1.561328
Validation loss after epoch 39 is: 1.749691
Train Accuracy after epoch 39 is: 0.900000
Validation Accuracy after epoch 39 is: 0.705900
Epoch 00039: reducing learning rate of group 0 to 3.9063e-06.
Patience: 18


Epoch [40/1000]: 100%|██████████| 500/500 [00:01<00:00, 406.95it/s]


Train loss after epoch 40 is: 1.561048
Validation loss after epoch 40 is: 1.750011
Train Accuracy after epoch 40 is: 0.900300
Validation Accuracy after epoch 40 is: 0.706900
Patience: 19


Epoch [41/1000]: 100%|██████████| 500/500 [00:01<00:00, 392.40it/s]


Train loss after epoch 41 is: 1.560997
Validation loss after epoch 41 is: 1.750322
Train Accuracy after epoch 41 is: 0.900300
Validation Accuracy after epoch 41 is: 0.705900
Patience: 20
Early Stopping ...


In [17]:
trainer.training_graph()

# 3. Model Testing

In [18]:
def predict(data_iter, model):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pred = []
    corrects = 0
    total_size = 0
    for (X,y) in tqdm(data_iter):
        batch_size = X.shape[0]
        total_size += batch_size
        X = X.to(device)
        y = y.to(device)
        # add this when do not need to record gradients
        with torch.no_grad():
            # calculate the loss
            output = model(X)
            corrects += (output.argmax(-1)==y).sum().cpu().item()
            pred += output.argmax(-1).cpu().numpy().tolist()
    # clear cache
    torch.cuda.empty_cache()
    
    return np.array(pred), corrects / total_size

In [19]:
predicted, accuracy = predict(test_data_iter, model)

100%|██████████| 63/63 [00:00<00:00, 1550.94it/s]


In [20]:
print(f'The accuracy for the test data set is {accuracy}')

The accuracy for the test data set is 0.658
