In [27]:
# library imports
import os
import glob
import cv2
import librosa
import torch as tc
import torchvision
import numpy as np
import pandas as pd
from torch import nn
import seaborn as sns
import librosa.display
from torch import optim
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn import preprocessing
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split

**Superficial data analysis**

In [2]:
# data upload
audioData = pd.read_csv('../input/gtzan-dataset-music-genre-classification/Data/features_30_sec.csv')
audioData[:10]

In [3]:
audioData.dtypes

**Correlation testing**

In [4]:
# general correlogram 
corrDataFrame = audioData.corr(method ='pearson')
plt.figure(figsize = (20,20))
sns.heatmap(corrDataFrame, cmap="Blues")

In [5]:
# filtering only the pairs which have high correlation

def get_redundant_pairs(df):
    '''Get diagonal and lower triangular pairs of correlation matrix'''
    pairs_to_drop = set()
    cols = df.columns
    for i in range(0, df.shape[1]):
        for j in range(0, i+1):
            pairs_to_drop.add((cols[i], cols[j]))
    return pairs_to_drop

def get_top_abs_correlations(df, n=5):
    au_corr = df.corr().abs().unstack()
    labels_to_drop = get_redundant_pairs(df)
    au_corr = au_corr.drop(labels=labels_to_drop).sort_values(ascending=False)
    return au_corr[0:n]

print("Top Absolute Correlations")

topCorr = get_top_abs_correlations(corrDataFrame, 221) # 221 as the 221st entry was the last one greater than 0.8
topCorr = pd.DataFrame(topCorr)
pd.set_option('display.max_rows', None)
topCorr

In [6]:
'''

Notes :

- Use Ctrl + F to find the rows with the desired dependent variable.
- Consider only the elements paired with them as independent variables for the model.

'''

**Model training**

In [35]:
# train/test data loading
df = pd.read_csv('../input/gtzan-dataset-music-genre-classification/Data/features_30_sec.csv')
train = df.sample(frac=0.8,random_state=200) # random state is a seed value
test = df.drop(train.index)

# dependent variable, categorical (0-100)
Ytest = test['volume'].to_numpy()
Ytrain = train['volume'].to_numpy()

# independent variable 1, categorical
le = preprocessing.LabelEncoder()
X1test = le.fit_transform(test['label'])
X1train = le.fit_transform(train['label'])

# independent variable 2, numerical
X2test = test['rms_mean'].to_numpy()
X2train = train['rms_mean'].to_numpy()

# add on the remaining relevant variables here

In [None]:
# declaring constants
numInputs = 2 # number of input variables based on correlation test
N = 1000 # number of entries

In [8]:
# data reshaping
Xtest = np.dstack([X1test,X2test]) # add on all the independent testing variables in this list
Xtest = Xtest.reshape(N,numInputs) 

Xtrain = np.dstack([X1train,X2train]) # add on all the independent training variables in this list
Xtrain = Xtrain.reshape(N,numInputs) 

Ytest = Ytest.reshape(N, 1) # one dependent test variable
Ytrain = Ytrain.reshape(N, 1) # one dependent train variable

# data type conversion
xtest = tc.from_numpy(Xtest.astype(np.float32))
xtrain = tc.from_numpy(Xtrain.astype(np.float32))

ytest = tc.from_numpy(Ytest.astype(np.float32))
ytrain = tc.from_numpy(Ytrain.astype(np.float32))

In [10]:
# model definition
class NeuralNetwork(nn.Module):
    def __init__(self, numInputs):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(numInputs, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 100),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [16]:
# error function & optimizer
model = NeuralNetwork(numInputs)
e_func = tc.nn.MSELoss()
optim = tc.optim.SGD(model.parameters(), lr = 0.001)

In [None]:
# move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [17]:
# model training 
ep = 1000 
train_losses, test_losses, accuracies = [], [], []

for e in range(ep) :
    
    # training model
    running_loss = 0
    xtrain, ytrain = xtrain.to(device), ytrain.to(device)
    
    optim.zero_grad() 
    
    output = model(xtrain) 
    loss = e_func(output, ytrain)
    
    loss.backward() 
    optim.step() 
    
    running_loss += loss.item()
    train_losses.append(running_loss/len(ytrain))
    
    # testing model
    test_loss, accuracy = 0, 0
    
    with torch.no_grad():
            
        model.eval()
        xtest, ytest = xtest.to(device), ytest.to(device)
        output = model(xtest) 
        test_loss += e_func(output, ytest)
        
        ps = torch.exp(output)       
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == ytest.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
        
    test_losses.append(test_loss/len(ytest))
    accuracies.append(accuracy/len(ytest))
    model.train()
        
    print(f'Epoch: {e}/{epochs}',
          f'Training loss: {running_loss/len(ytrain)}',
          f'Test loss: {test_loss/len(ytest)}',
          f'Accuracy: {accuracy/len(ytest)}')    
    

In [None]:
# plot train & test loss per iteration
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)

In [None]:
# plot testing accuracy
plt.plot(accuracies, label='Testing accuracy')
plt.legend(frameon=False)

In [None]:
# save model
PATH = '' # input directory in which the model is to be saved
torch.save(model, PATH)

**Testing model deployment**

In [None]:
# model class
class NeuralNetwork(nn.Module):
    def __init__(self, numInputs):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(numInputs, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 100),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
# load model
PATH = '' # directory in which the model is saved
model = torch.load(PATH)
model.eval()

In [None]:
# loading sample file for testing
fname = '../input/test-audio/Kalimba.mp3'
SR = 22050
data, _ = librosa.load(fname, sr=SR, mono=True)

# visualizing sample mp3
plt.figure(figsize = (16, 6))
librosa.display.waveplot(y = data, sr = 22050, color = "#A300F9")
plt.title("Sound Waves in sample", fontsize = 10)

In [36]:
# function to take an mp3 file as a parameter and return predicted volume by calling saved model
def predictVolume(data) :
    
    # getting all necessary model input values in the correct data format
    
    return prediction