In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# loading data
df_raw = pd.read_csv("Data/features_3_sec.csv", sep=",")
df_raw = df_raw.drop(columns='filename')
df_raw = df_raw.drop(columns='length')
df_raw.head()

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,3714.560359,1080790.0,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,3869.682242,672244.8,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,3997.63916,790712.7,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,3568.300218,921652.4,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,3469.992864,610211.1,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [3]:
df = df_raw
df_label = df.filter(regex=r'label')
df_label['label'].unique()

array(['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
       'metal', 'pop', 'reggae', 'rock'], dtype=object)

In [4]:
data = df.drop('label', axis=1)
label_in_str = df['label']

In [5]:
# Encode target strings with values from 0 to n_classes - 1
le = preprocessing.LabelEncoder()
label = le.fit_transform(label_in_str)

# Standardize feature values
scalar = preprocessing.StandardScaler()
scaled_data = scalar.fit_transform(data)

# Train test split
TEST_SIZE = 0.3
RANDOM_STATE = 42
X_train, X_test, y_train, y_test = train_test_split(scaled_data, label, test_size=TEST_SIZE, shuffle=True, random_state=RANDOM_STATE)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6993, 57), (2997, 57), (6993,), (2997,))

In [7]:
knn_1 = KNeighborsClassifier(n_neighbors=3)
knn_1.fit(X_train, y_train)
y_pred = knn_1.predict(X_test)
print("Training set score: {:.3f}".format(knn_1.score(X_train, y_train)))
print("Test set score: {:.3f}".format(knn_1.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.955
Test set score: 0.896
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       319
           1       0.90      0.95      0.92       308
           2       0.80      0.82      0.81       286
           3       0.86      0.91      0.89       301
           4       0.93      0.90      0.91       311
           5       0.91      0.87      0.89       286
           6       0.98      0.95      0.96       303
           7       0.92      0.91      0.91       267
           8       0.86      0.92      0.89       316
           9       0.92      0.82      0.87       300

    accuracy                           0.90      2997
   macro avg       0.90      0.90      0.90      2997
weighted avg       0.90      0.90      0.90      2997



In [9]:
knn_2 = KNeighborsClassifier(n_neighbors=5)
knn_2.fit(X_train, y_train)
y_pred = knn_2.predict(X_test)
print("Training set score: {:.3f}".format(knn_2.score(X_train, y_train)))
print("Test set score: {:.3f}".format(knn_2.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.934
Test set score: 0.885
              precision    recall  f1-score   support

           0       0.92      0.87      0.89       319
           1       0.87      0.95      0.91       308
           2       0.80      0.85      0.82       286
           3       0.82      0.90      0.86       301
           4       0.93      0.90      0.91       311
           5       0.90      0.84      0.86       286
           6       0.98      0.94      0.96       303
           7       0.91      0.90      0.90       267
           8       0.85      0.91      0.87       316
           9       0.92      0.79      0.85       300

    accuracy                           0.89      2997
   macro avg       0.89      0.88      0.88      2997
weighted avg       0.89      0.89      0.89      2997



In [10]:
knn_3 = KNeighborsClassifier(n_neighbors=7)
knn_3.fit(X_train, y_train)
y_pred = knn_3.predict(X_test)
print("Training set score: {:.3f}".format(knn_3.score(X_train, y_train)))
print("Test set score: {:.3f}".format(knn_3.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.915
Test set score: 0.875
              precision    recall  f1-score   support

           0       0.90      0.86      0.88       319
           1       0.87      0.97      0.92       308
           2       0.77      0.83      0.80       286
           3       0.81      0.91      0.86       301
           4       0.94      0.88      0.91       311
           5       0.89      0.84      0.87       286
           6       0.95      0.92      0.94       303
           7       0.91      0.91      0.91       267
           8       0.83      0.88      0.86       316
           9       0.91      0.75      0.82       300

    accuracy                           0.87      2997
   macro avg       0.88      0.87      0.87      2997
weighted avg       0.88      0.87      0.87      2997



In [11]:
svm_1 = SVC(kernel='linear')
svm_1.fit(X_train, y_train)
y_pred = svm_1.predict(X_test)
print("Training set score: {:.3f}".format(svm_1.score(X_train, y_train)))
print("Test set score: {:.3f}".format(svm_1.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.809
Test set score: 0.763
              precision    recall  f1-score   support

           0       0.73      0.81      0.77       319
           1       0.90      0.95      0.92       308
           2       0.66      0.67      0.67       286
           3       0.65      0.67      0.66       301
           4       0.76      0.76      0.76       311
           5       0.84      0.84      0.84       286
           6       0.86      0.86      0.86       303
           7       0.80      0.88      0.84       267
           8       0.78      0.68      0.73       316
           9       0.63      0.52      0.57       300

    accuracy                           0.76      2997
   macro avg       0.76      0.76      0.76      2997
weighted avg       0.76      0.76      0.76      2997



In [12]:
svm_2 = SVC(kernel='poly', degree=2)
svm_2.fit(X_train, y_train)
y_pred = svm_2.predict(X_test)
print("Training set score: {:.3f}".format(svm_2.score(X_train, y_train)))
print("Test set score: {:.3f}".format(svm_2.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.851
Test set score: 0.781
              precision    recall  f1-score   support

           0       0.86      0.73      0.79       319
           1       0.90      0.94      0.92       308
           2       0.58      0.79      0.67       286
           3       0.63      0.71      0.67       301
           4       0.86      0.73      0.79       311
           5       0.82      0.83      0.82       286
           6       0.88      0.88      0.88       303
           7       0.90      0.81      0.85       267
           8       0.82      0.76      0.79       316
           9       0.67      0.64      0.65       300

    accuracy                           0.78      2997
   macro avg       0.79      0.78      0.78      2997
weighted avg       0.79      0.78      0.78      2997



In [13]:
svm_3 = SVC(kernel='rbf')
svm_3.fit(X_train, y_train)
y_pred = svm_3.predict(X_test)
print("Training set score: {:.3f}".format(svm_3.score(X_train, y_train)))
print("Test set score: {:.3f}".format(svm_3.score(X_test, y_test)))
cf_matrix = confusion_matrix(y_test, y_pred)
print(classification_report(y_test,y_pred))

Training set score: 0.913
Test set score: 0.853
              precision    recall  f1-score   support

           0       0.87      0.87      0.87       319
           1       0.85      0.98      0.91       308
           2       0.79      0.79      0.79       286
           3       0.81      0.80      0.80       301
           4       0.93      0.84      0.88       311
           5       0.84      0.85      0.85       286
           6       0.89      0.92      0.91       303
           7       0.87      0.90      0.88       267
           8       0.88      0.87      0.88       316
           9       0.78      0.69      0.73       300

    accuracy                           0.85      2997
   macro avg       0.85      0.85      0.85      2997
weighted avg       0.85      0.85      0.85      2997



In [14]:
import torch
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
# Turn numpy array into pytorch tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create train and test datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Put dataset into dataloader
BATCH_SIZE = 256
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [39]:
class CNN(nn.Module):
    def __init__(self, output_size, device):
        super(CNN, self).__init__()
        self.output_size = output_size
        self.device = device

        self.layer_1 = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        self.layer_2 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        self.layer_3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        self.flatten = nn.Flatten()

        self.fc_1 = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(128*7, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
        )

        self.fc_2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
        )

        self.fc_3 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
        )

        self.fc_4 = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
        )

        self.fc_5 = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, output_size),
        )

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.flatten(x)
        x = self.fc_1(x)
        x = self.fc_2(x)
        x = self.fc_3(x)
        x = self.fc_4(x)
        x = self.fc_5(x)
        return nn.functional.softmax(x, dim=1)


In [43]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_EPOCH = 500
LEARNING_RATE = 0.0001
print(device)

cpu


In [41]:
model = CNN(output_size=10, device=device).to(device)
# Use sparse_softmax_cross_entropy
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [42]:
torch.manual_seed(42) 
torch.cuda.manual_seed(42)
from tqdm.auto import tqdm

for epoch in tqdm(range(NUM_EPOCH)):
    for i, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        y_pred = model(X)
        l = loss(y_pred, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch}, Loss: {l.item():.4f}')

    with torch.no_grad():
        correct = 0
        total = 0
        for X, y in test_loader:
            X = X.to(device)
            y = y.to(device)
            y_pred = model(X)
            _, predicted = torch.max(y_pred.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
        if epoch % 10 == 0:
            print(f'Accuracy of the network on the {total} test images: {100 * correct / total}%')

y_pred = []
y_true = []
with torch.no_grad():
    for X, y in test_loader:
        X = X.to(device)
        y = y.to(device)
        y_pred.extend(torch.argmax(model(X), 1).tolist())
        y_true.extend(y.tolist())

cf_matrix = confusion_matrix(y_true, y_pred)
print(classification_report(y_true, y_pred))

  0%|          | 1/500 [00:01<15:41,  1.89s/it]

Epoch: 0, Loss: 2.3013


  2%|▏         | 11/500 [00:19<14:33,  1.79s/it]

Epoch: 10, Loss: 2.1700


  4%|▍         | 21/500 [00:37<14:16,  1.79s/it]

Epoch: 20, Loss: 2.0759


  5%|▍         | 23/500 [00:41<14:24,  1.81s/it]


KeyboardInterrupt: 