# Mouting the drive


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Importing & Loading
- Importing libraries
- Loading the CNN model for prediction

In [None]:
!pip install pydub
from tensorflow import keras
import librosa
import numpy as np
import math
import warnings
import IPython.display as ipd
from IPython.core.display import display
warnings.filterwarnings('ignore')

model = keras.models.load_model("/content/drive/MyDrive/ME_781_Adi/music_cnn.h5") #loading the CNN model file saved via CNN.ipynb
#genres = ["pop", "metal", "disco", "blues", "reggae", "classical", "rock", "hiphop", "country", "jazz"]
genres = ["blues","metal", "jazz"]
for genre in genres:
    genre_new = genre + '_new'



    filename = "/content/drive/MyDrive/ME_781_Adi/test/" + genre + '.wav' #loading test wav file/s
    audio, sample_rate = librosa.load(filename, sr=22050)
    music_duration = librosa.get_duration(y=audio, sr=sample_rate)
    display(ipd.Audio(filename)) # Using iPython widgets to display audio file properties
    print('Music duration: ',music_duration)

    '''
    Code below refactors the audio files 
    by tailoring them to 30 sec .wav files
    making them suitable to not only predict 
    but also run and play via iPython widgets.
    '''
    from pydub import AudioSegment
    t1 = 0
    t2 = 1
    t1 = t1 * 30000 
    t2 = t2 * 30000
    newAudio = AudioSegment.from_wav ("/content/drive/MyDrive/ME_781_Adi/test/" + genre + '.wav')
    newAudio = newAudio[t1:t2]
    newAudio.export("/content/drive/MyDrive/ME_781_Adi/test/" + genre_new + '.wav', format = "wav")


    filename = ("/content/drive/MyDrive/ME_781_Adi/test/" + genre_new + '.wav')
    audio, sample_rate = librosa.load(filename, sr=22050)
    music_duration = librosa.get_duration(y=audio, sr=sample_rate)


    pad_ms = abs(music_duration-30) #padding the extra music duration
    silence = np.zeros(int(pad_ms*22050))

    audio = np.append(audio,silence) #appending audio and silence to create 30 secs samples
    new_music_duration = librosa.get_duration(y=audio, sr=sample_rate) #printing new duration

    print('Modified music duration:',new_music_duration)


    hop_length = 512
    n_fft = 2048
    samples_per_audio = sample_rate * 30
    n_mfcc=13
    music_segments=10

    data = {
        "mapping": ["pop", "metal", "disco", "blues", "reggae", "classical", "rock", "hiphop", "country", "jazz"],
        "mfcc": []
    }
    samples_per_segment = int(samples_per_audio / music_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)


    for d in range(music_segments):
        start = samples_per_segment * d
        finish = start + samples_per_segment
        mfcc = librosa.feature.mfcc(audio[start:finish],
                                    sample_rate,
                                    n_mfcc=n_mfcc,
                                    n_fft=n_fft,
                                    hop_length=hop_length)
        mfcc = mfcc.T
        data['mfcc'].append(mfcc.tolist())

    X = np.array(data['mfcc'])
    X = X[..., np.newaxis]

    predictions = np.argmax(model.predict(X), axis=-1) #making predictions via the model

    #trim = math.ceil(pad_ms/3)
    #new_predictions = predictions[:-trim]
    counts = np.bincount(predictions)
    print('Actual genre = '+genre+'\nFinal genre prediction = ',data['mapping'][np.argmax(counts)]) #printing the genres
    print('\n')




Music duration:  30.013333333333332
Modified music duration: 30.0
Actual genre = blues
Final genre prediction =  blues




Music duration:  30.013333333333332
Modified music duration: 30.0
Actual genre = metal
Final genre prediction =  metal




Music duration:  30.013333333333332
Modified music duration: 30.0
Actual genre = jazz
Final genre prediction =  jazz


