In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
%matplotlib inline
import librosa as lr
import librosa.display
import audioread
import ffmpeg
from glob import glob
from pydub import AudioSegment

## Getting directory for files

In [None]:
directory = []
path = 'fma_small/fma_small'

for folder in sorted(os.listdir(path)):
    if folder != '.ipynb_checkpoints':
        for file in sorted(os.listdir(path+'/'+folder)):
    
            directory.append(f'{path}/{folder}/{file}')

In [None]:
directory

In [None]:
## Loading in a mp3 file

audio, sr = lr.load(directory[0])

time = np.arange(0,len(audio)) / sr

# Neutral Network

In [None]:
## https://towardsdatascience.com/music-genre-recognition-using-convolutional-neural-networks-cnn-part-1-212c6b93da76

In [71]:
import numpy as np
import os
import matplotlib.pyplot as plt
import librosa
import pandas as pd

import tensorflow as tf
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, Dropout, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.preprocessing import image

from pydub import AudioSegment


from tensorflow.keras.preprocessing.image import ImageDataGenerator

import shutil
import random

In [None]:
from random import sample

small_directory = sample(directory, 250)

In [None]:
## Breaking audio into 6, 5 second parts

for file in small_directory:
    for w in range(0,5):
        t1 = 6*(w)*1000
        t2 = 6*(w+1)*1000
        newAudio = AudioSegment.from_mp3(file)
        new = newAudio[t1:t2]
        new.export(f'CNN-test/{file[24:-4]}'+'-'+f'{str(w)}.mp3', format="mp3")

In [None]:
## Generating the spectrograms 

for clip in os.listdir('CNN-test/'):
    
    if clip.endswith('.mp3'):
        y,sr = librosa.load('CNN-test/'+clip)

        mels = librosa.feature.melspectrogram(y=y,sr=sr)
        fig = plt.Figure()
        canvas = FigureCanvas(fig)
        p = plt.imshow(librosa.power_to_db(mels,ref=np.max))
        plt.axis('off')
        plt.savefig(f'CNN-test/figures/{clip[:-4]}.png')
        

In [69]:
## Loading in complete table of: track ID, genre, artist and title

genre_table = pd.read_csv('final_table.csv')

In [82]:
## Creating a dictionary of all different genres to label the images when we generate them

genres = list(genre_table['Genre'].unique())

genre_map = {genres[i]: i for i in range(len(genres))}

genre_map

{'Hip-Hop': 0,
 'Pop': 1,
 'Folk': 2,
 'Experimental': 3,
 'Rock': 4,
 'International': 5,
 'Electronic': 6,
 'Instrumental': 7}

In [74]:
## Creating dictionary of track ids and their genres

track_genre = set()

for clip in os.listdir('CNN-test/figures/train/'):
        track_genre.add(int(clip[:-6]))
    

track_genre = list(track_genre)
track_genre.sort()
track_genre = dict.fromkeys(track_genre, 0)

## Using genre table to impute out genre for track ids

for i in track_genre.keys():
    track_genre[i] = genre_table['Genre'][genre_table['Track ID']==i].item()

In [None]:
track_genre

In [None]:
## Counting of genres in dictionary

genre_count_dict = {}

for i in genre_table['Genre'].unique():
    genre_count_dict[i] = sum(value == i for value in track_genre.values())
    
genre_count_dict

In [84]:
## Encoding of genre

def encoding_genre(track_id):
    
    return genre_map[track_genre[track_id]]

In [116]:
# load all images into memory

def load_dataset(path):
    photos = list()
    targets = list()
    
    # loop audio spectrograms in directory
    for filename in os.listdir(path):
        # load image
        photo = image.load_img(path + filename, target_size=(72,108,3))
        # convert to numpy array
        photo = image.img_to_array(photo, dtype='uint8')
        # one hot encode tags for labels
        target = encoding_genre(int(filename[:-6]))
        
        # store
        photos.append(photo)
        targets.append(target)
    
    X = np.asarray(photos, dtype='float')
    y = np.asarray(targets)#, dtype='int')

    
    return X, y

In [117]:
## Creating X(image array) and y(genre labels)

X, y = load_dataset('CNN-test/figures/train/')

In [119]:
## One hot encoding

from keras.utils import to_categorical

y = to_categorical(y)

In [None]:
y

In [123]:
## Test train split

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [129]:
## Scaling

datagen = ImageDataGenerator(rescale=1.0/255.0)

train_iterator = datagen.flow(X_train, Y_train, batch_size=32)
test_iterator = datagen.flow(X_test, Y_test, batch_size=32)


In [153]:
## CNN - wooooooo
# define CNN model
# https://blog.clairvoyantsoft.com/music-genre-classification-using-cnn-ef9461553726
# https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-to-classify-satellite-photos-of-the-amazon-rainforest/
# https://towardsdatascience.com/music-genre-recognition-using-convolutional-neural-networks-cnn-part-1-212c6b93da76
# https://brilliant.org/wiki/convolutional-neural-network/
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization
# https://www.kaggle.com/andradaolteanu/work-w-audio-data-visualise-classify-recommend


def define_model():
    
    # build model
    model = Sequential([
    
    tf.keras.layers.Conv2D(16, (5, 5), activation='relu', input_shape=(72, 108, 3)),
    tf.keras.layers.Conv2D(16, (5, 5), activation='relu', kernel_regularizer='l2'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(32, (5, 5), activation='relu'),
    tf.keras.layers.Conv2D(32, (5, 5), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(64, (5, 5), activation='relu'),
    tf.keras.layers.Conv2D(64, (5, 5), activation='relu', kernel_regularizer='l2'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Flatten(),
        
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(8, activation='softmax')])
    
    return model

In [154]:
model = define_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [155]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_84 (Conv2D)           (None, 68, 104, 16)       1216      
_________________________________________________________________
conv2d_85 (Conv2D)           (None, 64, 100, 16)       6416      
_________________________________________________________________
batch_normalization_84 (Batc (None, 64, 100, 16)       64        
_________________________________________________________________
max_pooling2d_51 (MaxPooling (None, 32, 50, 16)        0         
_________________________________________________________________
dropout_64 (Dropout)         (None, 32, 50, 16)        0         
_________________________________________________________________
conv2d_86 (Conv2D)           (None, 28, 46, 32)        12832     
_________________________________________________________________
conv2d_87 (Conv2D)           (None, 24, 42, 32)      

In [156]:
model.fit(train_iterator, validation_data = test_iterator, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x12e11830b80>

In [None]:
# 'Hip-Hop': 0,
# 'Pop': 1, 
# 'Folk': 2,
# 'Experimental': 3,
# 'Rock': 4,
# 'International': 5,
# 'Electronic': 6,
# 'Instrumental': 7

In [None]:
## Checking class balance in Y_test

genre_count_dict = {}

for i in range(0,8):
    genre_count_dict[i] = sum(value == i for value in Y_train)
    
genre_count_dict

genre_table

# SKLearn packages - neutral net works better

In [None]:
## Random Forrest
## Logistic Regression
## KNN

## Random Forrest

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
dfmeta = pd.read_csv('MetaDataBetter.csv', header=[0,1,2], index_col=0)

dfmeta

In [None]:
genre

In [None]:
## Adding genre and label encoding

genre = pd.read_csv('FinalGenre.csv',index_col=0)  


dfmeta = dfmeta.join(genre, how='left')

In [None]:
dfmeta.dropna(inplace=True)

In [None]:
dfmeta

In [None]:
XRF = dfmeta.iloc[:,:-8]
YRF = dfmeta.loc[:,'Genre']

X_train_RF, X_test_RF, Y_train_RF, Y_test_RF = train_test_split(XRF, YRF, test_size=0.3, random_state=1)

In [None]:
from sklearn.preprocessing import MinMaxScaler

def scaler(x):
    
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(x)
    
    
    return scaled

In [None]:
X_train_RF = scaler(X_train_RF)

In [None]:
X_train_RF

In [None]:
tuned_parameters = {
    'n_estimators': [100, 150],
    'max_depth': [15]
                }


RF = GridSearchCV(RandomForestClassifier(),param_grid = tuned_parameters, cv = 5, verbose=3, scoring='accuracy')

RF.fit(X_train_RF,Y_train_RF)

In [None]:
RF.best_params_

In [None]:
pred = RF.predict(X_test_RF)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(Y_test_RF, pred))
print(confusion_matrix(Y_test_RF, pred))
