# FINAL FEATURE EXTRACTION AND GENRE CLASSIFICATION

## Importing required libraries

In [17]:
import warnings
warnings.filterwarnings('ignore')

In [18]:
import librosa
import numpy as np
from music21 import converter, analysis
from joblib import load
from tensorflow.keras.models import load_model

## Function for Extracting Initial Features using Librosa and Music21

In [20]:
def extracted_features(input1,input2):
    y, sr = librosa.load(input1)
    score = converter.parse(input2)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    loudness = librosa.feature.rms(y=y).mean()
    loudness=-(loudness)
    duration_ms = librosa.get_duration(y=y, sr=sr) * 1000
    mfccs = librosa.feature.mfcc(y=y, sr=sr,n_mfcc=12)
    avg_timbre = np.mean(mfccs, axis=1)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    avg_pitch = np.mean(chroma, axis=1)
    bar_count = len(score.parts[0].getElementsByClass('Measure'))
    key = score.analyze('key')

    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    beat_count = len(beats)

    time_signatures = score.recurse().getElementsByClass('TimeSignature')
    if time_signatures:
        f = time_signatures[0].numerator
    else:
        f=0


    key_map = {
        'C': 0, 'C#': 1, 'D-': 1, 'D': 2, 'D#': 3, 'E-': 3,
        'E': 4, 'F': 5, 'F#': 6, 'G-': 6, 'G': 7, 'G#': 8,
        'A-': 8, 'A': 9, 'A#': 10, 'B-': 10, 'B': 11
    }
    tonic = key.tonic.name
    mode = 1 if key.mode == 'major' else 0
    key=key_map[tonic]
    
    avg_timbre = avg_timbre.tolist()
    avg_pitch = avg_pitch.tolist()
    l=[tempo,loudness,key,f, mode, bar_count, beat_count,duration_ms]+avg_timbre+avg_pitch
    
    return l


## Function To Extract the Remaining Features and Classifying the Genre

In [23]:
def final_classification(x,y,):
    initial_features=extracted_features(x,y)
    inputs=initial_features[:5] + initial_features[7:8]
    initial_features=np.array(initial_features).reshape(1,-1)
    scaler_loaded = load('Scalars/scaler.joblib')
    X=scaler_loaded.transform(initial_features)
    X = X.reshape((X.shape[0], 1, X.shape[1]))
    model_loaded = load_model('Models/audiofeatures.keras')
    predictions_new = model_loaded.predict(X)
    #inputs=extracted_features_final(x,y)
    predictions_new=predictions_new.tolist()
    final_inputs=inputs+predictions_new[0]
    input_names = [
    "tempo", "loudness", "key", "time_signature", "mode",
    "duration_ms", "danceability", "energy", "speechiness",
    "acousticness", "instrumentalness", "liveness", "valence"]
    final_inputs_dict = dict(zip(input_names, final_inputs))
    final_inputs_dict.pop('loudness', None)
    ordered_features = [
    "danceability", "energy",  "speechiness",
    "acousticness", "instrumentalness", "liveness", "valence",
    "tempo", "duration_ms"]
    ordered_values = [final_inputs_dict[feature] for feature in ordered_features]
    ordered_values=np.array(ordered_values)
    scaler2 = load('Scalars/scaler2.joblib')
    value=scaler2.transform(ordered_values.reshape(1,-1)).flatten()
    for feature, value in zip(ordered_features, value):
        final_inputs_dict[feature] = value
    keys_to_remove = ['mode', 'time_signature', 'key']
    for key in keys_to_remove:
        final_inputs_dict.pop(key, None) 
    remaining_values = list(final_inputs_dict.values())
    values_array = np.array(remaining_values)
    model_loaded2 = load('Models/genreclassificationmodel.joblib')
    prediction=model_loaded2.predict(values_array.reshape(1,-1) )
    predicted_class_idx = prediction[0]
    class_mapping = {0: "classical", 1: "Electronic", 2: "Folk", 3: "Hip-hop", 4: "Rock"}
    predicted_class_label = class_mapping.get(predicted_class_idx, "Unknown class")
    
    return predicted_class_label

## Line to Print the Genre of the Audio by Calling the Function

In [24]:
print("The Classified Genre is:",final_classification('ex.mp3','ex.mid'))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
The Classified Genre is: Electronic
