<a href="https://colab.research.google.com/github/Minakshi85/Music-Genre-Classification-Final/blob/master/Project_2_Music_Genre_Identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!wget https://www.dropbox.com/s/4jw31k5mlzcmgis/genres.tar.gz?dl=0

In [0]:
import tarfile as tf
import librosa
import IPython.display as ipd

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image


import csv
import os
import pathlib

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import keras
from keras import models
from keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import load_model

In [0]:
tar = tf.open('/content/genres.tar.gz?dl=0')
tar.extractall('/content')
tar.close()

In [0]:
!rm /content/genres.tar.gz?dl=0

In [0]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate contrast tonnetz'
#[mfccs,chroma,mel,contrast,tonnetz]
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [0]:
file = open('music_info.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        #print(songname)
        y, sr = librosa.load(songname, mono=True, duration=30)
        # short term fourier transform
        stft = np.abs(librosa.stft(y))

        # spectral contrast
        contrast = librosa.feature.spectral_contrast(S=stft, sr=sr)

        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)

        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)} {np.mean(contrast)} {np.mean(tonnetz)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('music_info.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())


**Reading CSV file of features and data preprocessing**

In [52]:
music = pd.read_csv('/content/music_info.csv')
print("shape of the dataframe is :", music.shape)
music.head()

shape of the dataframe is : (1000, 30)


Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,contrast,tonnetz,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00087.au,0.336773,0.158098,1442.190271,1870.534155,3083.414688,0.050889,22.673188,-0.012294,-155.504931,125.638857,1.596554,45.804523,0.900779,25.367563,-10.787496,15.56638,-11.920468,1.26444,-6.649819,-0.792894,-7.748057,0.413548,-7.030262,3.997678,-6.256612,0.958226,2.019822,-5.742189,blues
1,blues.00025.au,0.290948,0.089222,1109.569627,1463.288131,2243.569767,0.051725,21.030588,-0.017111,-249.281996,130.997924,0.665435,66.696115,17.905895,4.62296,6.610036,1.245376,1.419175,-1.584293,-0.974159,-3.575065,-2.926915,4.598686,7.478115,-0.17877,-2.818857,-2.667636,0.751667,-0.533179,blues
2,blues.00079.au,0.421495,0.27087,2443.298208,2358.443293,5267.601481,0.124735,21.790103,0.001981,-3.528043,92.914318,-25.110401,45.1777,-10.861859,31.854362,-18.049332,28.61951,-10.224033,14.666931,-14.899714,14.742995,-15.600666,9.548172,-14.788735,2.067856,-13.821133,3.563616,-5.965995,-1.649998,blues
3,blues.00050.au,0.40086,0.18238,1945.848425,2082.246626,4175.874749,0.085806,21.139975,0.0068,-82.979023,107.052113,-25.320453,57.124995,0.085439,34.696804,-11.042022,20.532698,-11.213178,14.834126,-10.229334,12.539582,-9.762304,2.562253,-6.300853,2.996785,-8.718456,-0.326581,-2.980347,0.712601,blues
4,blues.00012.au,0.26932,0.119072,1361.045467,1567.804596,2739.625101,0.069124,22.135935,0.002276,-207.20808,132.799175,-15.438986,60.986727,0.725807,12.437828,1.183836,-1.540779,-17.888154,8.358496,-2.452068,-0.613248,0.384877,2.605128,-5.188924,-9.527455,-9.244394,-2.848274,-1.418707,-5.932607,blues


In [0]:
# Dropping unneccesary columns
music = music.drop(['filename'],axis=1)

In [54]:
#one hot encoding for labels
genre_label = music['label']
encoder = LabelEncoder()
Y = keras.utils.to_categorical(encoder.fit_transform(genre_label), num_classes = 10)
print('Length of Y : ', len(Y))
Y

Length of Y :  1000


array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [0]:
# conversion into numpy ndarray and splitting into train and test data sets
X = np.array(music.iloc[:, :-1], dtype=float)
train_x, test_x, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [56]:
# standrazation
scaler = StandardScaler()
scaler.fit(train_x)
x_train = scaler.transform(train_x)
x_test = scaler.transform(test_x)
x_test

array([[ 0.38831291,  0.8904742 ,  0.05040381, ..., -1.15885454,
        -0.16465766, -0.34735832],
       [-0.45839053,  1.95897852, -0.16102607, ...,  1.09840695,
         0.77674544,  0.6919388 ],
       [-0.66572447, -1.39446355, -0.62014835, ..., -0.15197628,
         0.42739438,  0.14177671],
       ...,
       [ 0.77409511,  0.53153317,  1.30029981, ...,  0.95324901,
         0.71258894,  1.85942952],
       [-2.2659541 , -0.58091622, -1.4655982 , ..., -0.32039274,
        -1.05861006, -1.16461402],
       [-1.35689104, -1.07654079, -0.47110729, ..., -1.07755096,
         0.80130118,  0.4518856 ]])

In [57]:
len(x_train), len(y_train), len(x_test), len(y_test)

(800, 800, 200, 200)

In [58]:
x_train[0]

array([-0.5468281 , -0.32198778, -1.02164072, -1.15679731, -1.16135559,
       -0.67210721, -0.47709137,  1.68798643, -0.21760932,  1.39997033,
       -1.50222007,  0.29696169,  1.40307622, -1.71484295,  1.29619517,
       -0.43399674,  0.93264468, -2.28944836,  0.53083762, -0.74651483,
        0.27394628, -0.49546553,  1.01474994, -0.0916023 ,  1.07678019,
        0.38870495,  1.58885307,  1.70568858])

In [59]:
y_test[0]

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], dtype=float32)

In [0]:
cp = ModelCheckpoint('/content/best_model.h5', 
                             monitor='val_acc',
                             verbose=1, 
                             save_best_only=True)

In [0]:
es = EarlyStopping(monitor='val_acc', 
                          patience=7,
                          verbose=1)

In [0]:
model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(train_x.shape[1],)))
model.add(Dropout(0.25))
model.add(layers.Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(layers.Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(layers.Dense(10, activation='softmax'))

In [121]:
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 512)               14848     
_________________________________________________________________
dropout_33 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_43 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_34 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_44 (Dense)             (None, 64)                16448     
_________________________________________________________________
dropout_35 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_45 (Dense)             (None, 10)              

In [0]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [123]:
history = model.fit(x_train,
                    y_train,
                    epochs=70,
                    batch_size=64,
                    validation_data =(x_test, y_test),
                    callbacks = [es,cp])

Train on 800 samples, validate on 200 samples
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.34000, saving model to /content/best_model.h5
Epoch 2/70

Epoch 00002: val_acc improved from 0.34000 to 0.38500, saving model to /content/best_model.h5
Epoch 3/70

Epoch 00003: val_acc improved from 0.38500 to 0.54500, saving model to /content/best_model.h5
Epoch 4/70

Epoch 00004: val_acc improved from 0.54500 to 0.57000, saving model to /content/best_model.h5
Epoch 5/70

Epoch 00005: val_acc improved from 0.57000 to 0.59000, saving model to /content/best_model.h5
Epoch 6/70

Epoch 00006: val_acc improved from 0.59000 to 0.61500, saving model to /content/best_model.h5
Epoch 7/70

Epoch 00007: val_acc improved from 0.61500 to 0.63500, saving model to /content/best_model.h5
Epoch 8/70

Epoch 00008: val_acc improved from 0.63500 to 0.69000, saving model to /content/best_model.h5
Epoch 9/70

Epoch 00009: val_acc did not improve from 0.69000
Epoch 10/70

Epoch 00010: val_acc did not impro

In [0]:
best_model = load_model('/content/best_model.h5')

In [148]:
p = best_model.predict(x_test)
p

array([[2.1382427e-04, 6.4396998e-05, 2.6140348e-03, ..., 2.6155585e-02,
        6.0548283e-02, 8.4283194e-03],
       [3.8790807e-01, 3.1437623e-04, 5.7088113e-01, ..., 1.8940961e-04,
        1.4015556e-04, 4.6943347e-03],
       [3.8625940e-03, 1.7355908e-04, 4.5603327e-03, ..., 2.4343934e-05,
        2.7197599e-04, 1.9191434e-03],
       ...,
       [5.5749207e-03, 7.2815520e-04, 2.7227294e-02, ..., 5.6503154e-02,
        8.1240386e-02, 8.6175613e-03],
       [2.8228718e-03, 3.3709832e-02, 1.4172831e-03, ..., 1.0844908e-05,
        1.0990199e-04, 4.3277530e-05],
       [5.6258999e-03, 3.5305067e-03, 3.5862195e-01, ..., 6.2748403e-03,
        3.0772284e-02, 4.1213520e-02]], dtype=float32)

In [150]:
classes = best_model.predict_classes(x_test)
classes

array([4, 2, 5, 0, 7, 1, 1, 9, 3, 9, 6, 5, 0, 4, 6, 1, 8, 8, 9, 5, 5, 3,
       5, 3, 3, 8, 9, 3, 1, 7, 6, 9, 3, 8, 6, 4, 4, 5, 0, 9, 9, 7, 6, 7,
       5, 0, 8, 4, 9, 9, 4, 8, 8, 9, 1, 4, 4, 0, 6, 8, 7, 5, 1, 6, 6, 9,
       2, 7, 6, 1, 0, 3, 5, 4, 8, 2, 7, 8, 7, 7, 5, 7, 9, 4, 4, 5, 3, 7,
       9, 5, 2, 2, 5, 9, 1, 5, 9, 8, 3, 4, 2, 5, 9, 0, 3, 5, 5, 8, 9, 0,
       1, 3, 5, 8, 1, 7, 7, 6, 6, 4, 0, 1, 4, 5, 7, 5, 8, 0, 4, 1, 9, 1,
       9, 5, 2, 2, 8, 3, 5, 5, 2, 1, 7, 4, 4, 2, 6, 5, 7, 6, 3, 4, 0, 6,
       8, 1, 3, 7, 2, 9, 5, 9, 6, 1, 2, 4, 1, 7, 4, 7, 9, 9, 4, 3, 1, 6,
       5, 1, 6, 4, 2, 8, 7, 2, 1, 5, 9, 0, 8, 5, 2, 9, 6, 0, 2, 2, 2, 4,
       5, 5])

In [0]:
y_class = np.where(p>0.5, 1,0)

In [0]:
from sklearn.metrics import accuracy_score

In [147]:
print("Accuracy Score %: ", (accuracy_score(y_test, y_class))*100)

Accuracy Score %:  68.0


# Model building using Conv1D

In [0]:
from keras.layers import InputLayer, Conv1D, Dense, Flatten, MaxPool1D, GlobalMaxPool1D, Dropout

In [71]:
x_train.shape, x_test.shape

((800, 28), (200, 28))

In [0]:
train_x_cnn = x_train.reshape(800,28,1)
test_x_cnn = x_test.reshape(200,28,1)

In [0]:
model1 = models.Sequential()

model1.add(InputLayer(input_shape=(28,1)))
model1.add(Conv1D(filters =50, 
                 kernel_size= 10,
                 padding = 'valid',
                 activation = 'relu',
                 strides = 1))
model1.add(MaxPool1D(strides=1))
model1.add(Flatten())
model1.add(Dropout(0.2))
model1.add(Dense(512, activation ='tanh'))
model1.add(Dropout(0.2))
model1.add(Dense(128, activation ='tanh'))
model1.add(Dropout(0.2))
model1.add(Dense(units=10, activation='softmax'))

In [100]:
model1.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_7 (Conv1D)            (None, 19, 50)            550       
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 18, 50)            0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 900)               0         
_________________________________________________________________
dropout_21 (Dropout)         (None, 900)               0         
_________________________________________________________________
dense_27 (Dense)             (None, 512)               461312    
_________________________________________________________________
dropout_22 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 128)             

In [0]:
cp_cnn = ModelCheckpoint('/content/best_model_cnn.h5', 
                             monitor='val_loss',
                             verbose=1, 
                             save_best_only=True)

In [0]:
model1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [103]:
model1.fit(train_x_cnn,
            y_train,
            epochs=50,
            batch_size=64,
            validation_data =(test_x_cnn, y_test),
            callbacks = [es,cp_cnn])

Train on 800 samples, validate on 200 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.44331, saving model to /content/best_model_cnn.h5
Epoch 2/50

Epoch 00002: val_loss improved from 1.44331 to 1.24091, saving model to /content/best_model_cnn.h5
Epoch 3/50

Epoch 00003: val_loss improved from 1.24091 to 1.16375, saving model to /content/best_model_cnn.h5
Epoch 4/50

Epoch 00004: val_loss improved from 1.16375 to 1.11687, saving model to /content/best_model_cnn.h5
Epoch 5/50

Epoch 00005: val_loss improved from 1.11687 to 1.10001, saving model to /content/best_model_cnn.h5
Epoch 6/50

Epoch 00006: val_loss improved from 1.10001 to 1.09517, saving model to /content/best_model_cnn.h5
Epoch 7/50

Epoch 00007: val_loss improved from 1.09517 to 1.09152, saving model to /content/best_model_cnn.h5
Epoch 8/50

Epoch 00008: val_loss improved from 1.09152 to 1.04183, saving model to /content/best_model_cnn.h5
Epoch 9/50

Epoch 00009: val_loss improved from 1.04183 to 1.03597, sa

<keras.callbacks.History at 0x7ff1853f71d0>