<a href="https://colab.research.google.com/github/ShobitKapila/Acoustic-source-identification/blob/main/Music_genre_clasification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#KKN - classification

In [None]:
import librosa
import pandas as pd
import numpy as np
import os
import csv
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))


In [None]:

genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()
for g in genres:
    path_audio = os.path.join('/content/geners', g)
    os.makedirs(path_audio, exist_ok=True)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [None]:
for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

In [None]:
X = data_shuffle.iloc[:, :-1]

In [None]:
genre_list = data_shuffle.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
print("The test Accuracy is :", test_acc * 100)

In [None]:
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_feature = fit_feature('/content/better-day-186374.mp3')
scaler = joblib.load('scaler.pkl')
knn = joblib.load('knn_model.pkl')
new_song_feature = scaler.transform([new_song_feature])
prediction = knn.predict(new_song_feature)
predicted_genre = encoder.inverse_transform(prediction)
print("Predicted Genre:", predicted_genre[0])

#SVM

In [None]:
import librosa
import pandas as pd
import numpy as np
import os
import csv
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))

In [None]:

genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()
for g in genres:
    path_audio = os.path.join('/content/geners', g)
    os.makedirs(path_audio, exist_ok=True)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [None]:
for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

In [None]:
X = data_shuffle.iloc[:, :-1]

In [None]:
genre_list = data_shuffle.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
svm_model = svm.SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)

In [None]:
y_pred = svm_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

In [None]:
joblib.dump(knn, 'svm_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_feature = fit_feature('/content/better-day-186374.mp3')
scaler = joblib.load('scaler.pkl')
knn = joblib.load('svm_model.pkl')
new_song_feature = scaler.transform([new_song_feature])
prediction = knn.predict(new_song_feature)
predicted_genre = encoder.inverse_transform(prediction)
print("Predicted Genre:", predicted_genre[0])

#Random Forest

In [None]:
import os
import pandas as pd
import numpy as np
import IPython
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))

In [None]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()
for g in genres:
    path_audio = os.path.join('/content/geners', g)
    os.makedirs(path_audio, exist_ok=True)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

In [None]:
X = data_shuffle.iloc[:, :-1]
genre_list = data_shuffle.iloc[:, -1]

In [None]:
encoder = LabelEncoder()

In [None]:
y = encoder.fit_transform(genre_list)
print(y)

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=seed)
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
print("The test Accuracy is :", test_acc * 100)

In [None]:
joblib.dump(rf, 'random_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_feature = fit_feature('/content/better-day-186374.mp3')
scaler = joblib.load('scaler.pkl')
rf = joblib.load('random_forest_model.pkl')
new_song_feature = scaler.transform([new_song_feature])
prediction = rf.predict(new_song_feature)
predicted_genre = encoder.inverse_transform(prediction)
print("Predicted Genre:", predicted_genre[0])

# Neural Network -
  The model consists of multiple Dense layers with ReLU activation functions and Dropout layers for regularization. The final layer is a Dense layer with a softmax activation function, indicating that this is a multi-class classification problem (genre classification).


In [None]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
import base64
from IPython.display import HTML
from IPython.display import FileLink
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import sklearn.model_selection as skms
import keras
from keras import models
from keras import layers
from tensorflow import keras
from warnings import filterwarnings

In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))

['blues', 'country', 'hiphop', 'jazz', 'disco', 'pop', 'classical', 'reggae', 'rock', 'metal', '.ipynb_checkpoints']


In [None]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()
for g in genres:
  path_audio = os.path.join('/content',f'{g}')
  os.makedirs(path_audio)

In [None]:
genres

['blues',
 'classical',
 'country',
 'disco',
 'hiphop',
 'jazz',
 'metal',
 'pop',
 'reggae',
 'rock']

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
data_3s.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00030.au,0.252451,0.129469,796.877132,1363.858304,1241.72044,0.027813,-261.393524,142.396729,11.462423,...,-6.913297,-12.653015,-12.238706,-21.957417,-23.932734,-19.818665,-17.74176,-15.28971,-6.54513,blues
1,blues.00043.au,0.398613,0.12705,2146.989442,2346.179667,4924.229455,0.087361,-106.287834,102.910103,-9.609563,...,6.560434,-9.160453,5.28328,-15.422059,1.94802,-7.752453,2.932549,-4.131612,4.118633,blues
2,blues.00089.au,0.330216,0.057961,677.501002,1136.151689,1216.211877,0.022919,-345.310028,164.461838,27.915159,...,-6.900138,0.515772,-0.058611,-2.355061,3.843762,-1.428511,-8.551285,-4.98343,0.69475,blues
3,blues.00093.au,0.399003,0.056844,640.531249,1022.053919,1190.123573,0.022397,-350.515137,167.412292,19.829763,...,1.817667,-11.212885,-7.739208,0.326726,-3.299458,-7.078539,0.315696,7.07478,2.348529,blues
4,blues.00011.au,0.395926,0.053601,1387.962171,1815.10176,3007.525916,0.044899,-321.723846,123.837578,4.081958,...,0.056975,-5.541474,-4.698399,-2.931809,-3.163541,-0.938552,-0.314044,-0.40441,-4.586336,blues


In [None]:
data_3s.shape

(2000, 28)

In [None]:
seed = 12
np.random.seed(seed)

data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'],axis=1)
data_shuffle.head()

Unnamed: 0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,0.25618,0.11515,1015.645596,1736.459418,1781.044358,0.033905,-264.730042,117.711273,37.102875,40.730743,...,-10.589654,-15.629943,-1.5182,-12.896955,-11.877695,-6.26171,-7.029034,-7.43652,-9.023581,blues
1,0.17189,0.131722,853.453653,1354.98108,1072.850436,0.045797,-265.327423,177.978546,26.460379,-13.352622,...,-5.094402,-8.361628,-4.551609,-6.124369,-6.634355,-13.539562,-10.734464,-8.922804,-3.120422,classical
2,0.433044,0.166802,2358.86743,2035.459115,4431.698843,0.166245,-68.454773,91.542267,-33.452648,72.505608,...,10.681966,-14.74017,0.091191,-5.635654,5.097917,-7.171759,-0.506923,-7.664429,3.29503,blues
3,0.242811,0.043694,1005.522854,1296.907222,1685.055965,0.050068,-301.612091,178.697632,-11.299769,30.42975,...,3.728764,-0.705305,4.562327,-3.58993,-9.752854,-5.986495,-1.602933,-2.998392,-8.878064,classical
4,0.286972,0.133271,1057.885607,1464.969731,1694.083346,0.055649,-171.273804,172.718277,-9.714803,-0.288757,...,-6.272744,-10.091816,-2.728145,-6.671346,-2.232099,1.436002,2.904237,-7.057222,-2.60965,classical


In [None]:
X = data_shuffle.iloc[:, :-1]

In [None]:
genre_list = data_shuffle.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

[0 1 0 ... 1 1 1]


In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = skms.train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)

X_dev, X_test, y_dev, y_test = skms.train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

Train set has 1400 records out of 2000 which is 70%
Dev set has 396 records out of 2000 which is 20%
Test set has 204 records out of 2000 which is 10%


In [None]:
import sklearn.preprocessing as skp

scaler = skp.StandardScaler()

X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

In [None]:
import tensorflow as tf
print("TF version:-", tf.__version__)
import keras as k
tf.random.set_seed(seed)

TF version:- 2.14.0


In [None]:
ACCURACY_THRESHOLD = 0.94

class myCallback(k.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_accuracy') > ACCURACY_THRESHOLD):
            print("\n\nStopping training as we have reached %2.2f%% accuracy!" %(ACCURACY_THRESHOLD*100))
            self.model.stop_training = True

def trainModel(model, epochs, optimizer):
    batch_size = 128
    callback = myCallback()
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics='accuracy'
    )
    return model.fit(X_train, y_train, validation_data=(X_dev, y_dev), epochs=epochs,
                     batch_size=batch_size, callbacks=[callback])

def plotHistory(history):
    print("Max. Validation Accuracy",max(history.history["val_accuracy"]))
    pd.DataFrame(history.history).plot(figsize=(12,6))
    plt.show()

In [None]:
model_4 = k.models.Sequential([
    k.layers.Dense(1024, activation='relu', input_shape=(X_train.shape[1],)),
    k.layers.Dropout(0.3),

    k.layers.Dense(512, activation='relu'),
    k.layers.Dropout(0.3),

    k.layers.Dense(256, activation='relu'),
    k.layers.Dropout(0.3),

    k.layers.Dense(128, activation='relu'),
    k.layers.Dropout(0.3),

    k.layers.Dense(64, activation='relu'),
    k.layers.Dropout(0.3),

    k.layers.Dense(10, activation='softmax'),
])
print(model_4.summary())
model_4_history = trainModel(model=model_4, epochs=500, optimizer='rmsprop')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 1024)              27648     
                                                                 
 dropout_4 (Dropout)         (None, 1024)              0         
                                                                 
 dense_6 (Dense)             (None, 512)               524800    
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_7 (Dense)             (None, 256)               131328    
                                                                 
 dropout_6 (Dropout)         (None, 256)               0         
                                                                 
 dense_8 (Dense)             (None, 128)              

In [None]:
test_loss, test_acc  = model_4.evaluate(X_test, y_test, batch_size=128)
print("The test Loss is :",test_loss)
print("\nThe Best test Accuracy is :",test_acc*100)

The test Loss is : 0.08516893535852432

The Best test Accuracy is : 96.56862616539001


In [None]:
predictions = model_4.predict(X_test)
np.argmax(predictions[0])



1

In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
file_path = "/content/better-day-186374.mp3"
feature = fit_feature(file_path)
y = model_4.predict(scaler.transform([feature]))
ind = np.argmax(y)
genres[ind]





'blues'

In [None]:
model_4.save("music_gener_ASI.h5")
FileLink(r'music_gener_ASI.h5')

  saving_api.save_model(


#LSTM


In [None]:
import librosa
import pandas as pd
import numpy as np
import os
import csv
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import Callback
from sklearn.metrics import classification_report, accuracy_score

In [None]:
general_path = '/path/to/your/data'
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()


In [None]:
for g in genres:
    path_audio = os.path.join(general_path, g)
    os.makedirs(path_audio, exist_ok=True)


In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [None]:
for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

         filename  chroma_stft      rmse  spectral_centroid  \
0  blues.00030.au     0.252451  0.129469         796.877132   
1  blues.00043.au     0.398613  0.127050        2146.989442   
2  blues.00089.au     0.330216  0.057961         677.501002   
3  blues.00093.au     0.399003  0.056844         640.531249   
4  blues.00011.au     0.395926  0.053601        1387.962171   

   spectral_bandwidth      rolloff  zero_crossing_rate       mfcc1  \
0         1363.858304  1241.720440            0.027813 -261.393524   
1         2346.179667  4924.229455            0.087361 -106.287834   
2         1136.151689  1216.211877            0.022919 -345.310028   
3         1022.053919  1190.123573            0.022397 -350.515137   
4         1815.101760  3007.525916            0.044899 -321.723846   

        mfcc2      mfcc3  ...    mfcc12     mfcc13     mfcc14     mfcc15  \
0  142.396729  11.462423  ... -6.913297 -12.653015 -12.238706 -21.957417   
1  102.910103  -9.609563  ...  6.560434  -9.1604

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

   chroma_stft      rmse  spectral_centroid  spectral_bandwidth      rolloff  \
0     0.277971  0.160153        2562.661518         2478.888737  5153.226788   
1     0.456815  0.153437        2209.864606         2729.672694  5100.636080   
2     0.413502  0.086312        4700.819906         3427.468067  8845.508564   
3     0.304375  0.098867        1853.964030         1944.683935  3848.480319   
4     0.340747  0.065988        1958.418703         2156.088175  4275.003380   

   zero_crossing_rate       mfcc1       mfcc2      mfcc3      mfcc4  ...  \
0            0.117075  -72.641045   71.300972 -10.395139  32.301598  ...   
1            0.072269 -116.014336   91.779205  20.279716  30.649773  ...   
2            0.285780 -129.735794   27.791946  27.142660  17.484444  ...   
3            0.082027 -171.452637  117.102180 -22.616510  32.930000  ...   
4            0.089915 -165.906097  122.589996 -19.829643  27.253536  ...   

     mfcc12     mfcc13    mfcc14     mfcc15    mfcc16    mfcc1

In [None]:
X = data_shuffle.iloc[:, :-1]
genre_list = data_shuffle.iloc[:, -1]

In [None]:
encoder = LabelEncoder()

In [None]:
y = encoder.fit_transform(genre_list)
print(y)

[5 8 7 2 3 8 7 8 9 3 5 3 5 5 0 8 1 9 8 1 9 3 1 3 3 1 3 4 4 7 3 9 6 3 6 7 0
 2 6 1 5 9 1 4 7 5 2 0 0 9 1 8 9 5 2 1 6 3 3 3 5 5 4 6 8 4 5 4 5 3 6 1 6 8
 6 0 8 0 5 3 7 7 4 8 3 2 7 1 5 7 9 9 2 5 2 4 8 5 0 1 9 7 7 4 6 8 4 9 8 5 8
 6 5 8 5 2 7 1 2 3 7 0 3 3 9 4 4 3 8 2 2 9 0 9 1 9 4 6 5 0 4 2 9 0 3 1 5 5
 7 9 9 4 4 1 6 3 6 4 4 1 8 2 4 6 7 3 7 0 8 2 3 0 1 9 8 0 3 1 6 4 4 6 3 1 1
 9 8 6 3 8 6 3 7 7 7 9 7 9 0 1 0 9 2 7 7 5 6 4 8 0 4 2 3 2 9 3 7 0 3 9 5 0
 1 4 5 7 1 2 9 2 0 3 8 0 4 5 7 5 4 0 6 1 0 4 6 1 7 6 3 0 9 1 3 2 5 7 9 2 9
 2 2 2 1 5 0 5 9 4 2 5 0 4 9 7 9 9 2 8 4 0 0 0 8 0 2 1 7 5 8 2 7 6 8 2 4 6
 2 2 3 6 8 2 4 9 4 4 1 8 6 5 7 1 0 8 2 9 6 3 5 4 0 7 7 3 5 0 4 5 3 1 1 7 9
 3 5 4 7 2 5 7 0 7 5 9 9 7 9 4 7 3 0 8 2 6 3 0 1 8 3 9 5 9 0 8 1 8 1 9 0 2
 0 9 4 0 9 7 2 0 8 5 4 5 5 3 2 6 2 8 2 8 3 9 8 5 7 4 6 1 5 6 2 7 7 0 8 3 6
 4 3 8 6 7 0 2 0 3 3 6 8 1 3 9 2 5 6 1 0 6 2 4 7 9 4 7 0 9 9 3 9 0 3 6 6 6
 4 8 1 9 2 8 2 9 1 6 0 8 4 3 5 1 9 7 8 7 4 4 1 5 6 2 1 9 3 3 6 7 8 5 1 0 1
 4 7 3 9 8 0 9 5 0 0 0 7 

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

Train set has 700 records out of 1000 which is 70%
Dev set has 198 records out of 1000 which is 20%
Test set has 102 records out of 1000 which is 10%


In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
X_train = np.expand_dims(X_train.values, axis=1)
X_dev = np.expand_dims(X_dev.values, axis=1)
X_test = np.expand_dims(X_test.values, axis=1)

In [None]:
ACCURACY_THRESHOLD = 0.94
class myCallback(Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_accuracy') > ACCURACY_THRESHOLD):
            print("\n\nStopping training as we have reached %2.2f%% accuracy!" %(ACCURACY_THRESHOLD*100))
            self.model.stop_training = True

def trainModel(model, epochs, optimizer):
    batch_size = 128
    callback = myCallback()
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics='accuracy'
    )
    return model.fit(X_train, y_train, validation_data=(X_dev, y_dev), epochs=epochs,
                     batch_size=batch_size, callbacks=[callback])

def plotHistory(history):
    print("Max. Validation Accuracy",max(history.history["val_accuracy"]))
    pd.DataFrame(history.history).plot(figsize=(12,6))
    plt.show()

In [None]:
model_lstm = Sequential([
    LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.3),

    LSTM(64, return_sequences=False),
    Dropout(0.3),

    Dense(128, activation='relu'),
    Dropout(0.3),

    Dense(64, activation='relu'),
    Dropout(0.3),

    Dense(10, activation='softmax'),
])

In [None]:
print(model_lstm.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 128)            79360     
                                                                 
 dropout (Dropout)           (None, 1, 128)            0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8

In [None]:
model_lstm_history = trainModel(model=model_lstm, epochs=500, optimizer='rmsprop')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [None]:
test_loss, test_acc  = model_lstm.evaluate(X_test, y_test, batch_size=128)
print("The test Loss is :",test_loss)
print("\nThe Best test Accuracy is :",test_acc*100)

The test Loss is : 4.895003318786621

The Best test Accuracy is : 52.941179275512695


In [None]:
predictions = model_lstm.predict(X_test)
np.argmax(predictions[0])



4

In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_feature = fit_feature('/content/better-day-186374.mp3')
new_song_feature = scaler.transform([new_song_feature])
new_song_feature = np.expand_dims(new_song_feature, axis=1)
prediction = model_lstm.predict(new_song_feature)
predicted_genre = encoder.inverse_transform([np.argmax(prediction)])
print("Predicted Genre:", predicted_genre[0])

Predicted Genre: blues




#SVM-RF

In [None]:
import librosa
import pandas as pd
import numpy as np
import joblib
import os
import csv
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import classification_report, accuracy_score


In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))

['blues', 'country', 'hiphop', 'jazz', 'disco', 'pop', 'classical', 'reggae', 'rock', 'metal', '.ipynb_checkpoints']


In [None]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()

for g in genres:
  path_audio = os.path.join('/content',f'{g}')
  os.makedirs(path_audio)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

         filename  chroma_stft      rmse  spectral_centroid  \
0  blues.00030.au     0.252451  0.129469         796.877132   
1  blues.00043.au     0.398613  0.127050        2146.989442   
2  blues.00089.au     0.330216  0.057961         677.501002   
3  blues.00093.au     0.399003  0.056844         640.531249   
4  blues.00011.au     0.395926  0.053601        1387.962171   

   spectral_bandwidth      rolloff  zero_crossing_rate       mfcc1  \
0         1363.858304  1241.720440            0.027813 -261.393524   
1         2346.179667  4924.229455            0.087361 -106.287834   
2         1136.151689  1216.211877            0.022919 -345.310028   
3         1022.053919  1190.123573            0.022397 -350.515137   
4         1815.101760  3007.525916            0.044899 -321.723846   

        mfcc2      mfcc3  ...    mfcc12     mfcc13     mfcc14     mfcc15  \
0  142.396729  11.462423  ... -6.913297 -12.653015 -12.238706 -21.957417   
1  102.910103  -9.609563  ...  6.560434  -9.1604

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

   chroma_stft      rmse  spectral_centroid  spectral_bandwidth      rolloff  \
0     0.277971  0.160153        2562.661518         2478.888737  5153.226788   
1     0.456815  0.153437        2209.864606         2729.672694  5100.636080   
2     0.413502  0.086312        4700.819906         3427.468067  8845.508564   
3     0.304375  0.098867        1853.964030         1944.683935  3848.480319   
4     0.340747  0.065988        1958.418703         2156.088175  4275.003380   

   zero_crossing_rate       mfcc1       mfcc2      mfcc3      mfcc4  ...  \
0            0.117075  -72.641045   71.300972 -10.395139  32.301598  ...   
1            0.072269 -116.014336   91.779205  20.279716  30.649773  ...   
2            0.285780 -129.735794   27.791946  27.142660  17.484444  ...   
3            0.082027 -171.452637  117.102180 -22.616510  32.930000  ...   
4            0.089915 -165.906097  122.589996 -19.829643  27.253536  ...   

     mfcc12     mfcc13    mfcc14     mfcc15    mfcc16    mfcc1

In [None]:
X = data_shuffle.iloc[:, :-1]

In [None]:
genre_list = data_shuffle.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

[5 8 7 2 3 8 7 8 9 3 5 3 5 5 0 8 1 9 8 1 9 3 1 3 3 1 3 4 4 7 3 9 6 3 6 7 0
 2 6 1 5 9 1 4 7 5 2 0 0 9 1 8 9 5 2 1 6 3 3 3 5 5 4 6 8 4 5 4 5 3 6 1 6 8
 6 0 8 0 5 3 7 7 4 8 3 2 7 1 5 7 9 9 2 5 2 4 8 5 0 1 9 7 7 4 6 8 4 9 8 5 8
 6 5 8 5 2 7 1 2 3 7 0 3 3 9 4 4 3 8 2 2 9 0 9 1 9 4 6 5 0 4 2 9 0 3 1 5 5
 7 9 9 4 4 1 6 3 6 4 4 1 8 2 4 6 7 3 7 0 8 2 3 0 1 9 8 0 3 1 6 4 4 6 3 1 1
 9 8 6 3 8 6 3 7 7 7 9 7 9 0 1 0 9 2 7 7 5 6 4 8 0 4 2 3 2 9 3 7 0 3 9 5 0
 1 4 5 7 1 2 9 2 0 3 8 0 4 5 7 5 4 0 6 1 0 4 6 1 7 6 3 0 9 1 3 2 5 7 9 2 9
 2 2 2 1 5 0 5 9 4 2 5 0 4 9 7 9 9 2 8 4 0 0 0 8 0 2 1 7 5 8 2 7 6 8 2 4 6
 2 2 3 6 8 2 4 9 4 4 1 8 6 5 7 1 0 8 2 9 6 3 5 4 0 7 7 3 5 0 4 5 3 1 1 7 9
 3 5 4 7 2 5 7 0 7 5 9 9 7 9 4 7 3 0 8 2 6 3 0 1 8 3 9 5 9 0 8 1 8 1 9 0 2
 0 9 4 0 9 7 2 0 8 5 4 5 5 3 2 6 2 8 2 8 3 9 8 5 7 4 6 1 5 6 2 7 7 0 8 3 6
 4 3 8 6 7 0 2 0 3 3 6 8 1 3 9 2 5 6 1 0 6 2 4 7 9 4 7 0 9 9 3 9 0 3 6 6 6
 4 8 1 9 2 8 2 9 1 6 0 8 4 3 5 1 9 7 8 7 4 4 1 5 6 2 1 9 3 3 6 7 8 5 1 0 1
 4 7 3 9 8 0 9 5 0 0 0 7 

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

Train set has 700 records out of 1000 which is 70%
Dev set has 198 records out of 1000 which is 20%
Test set has 102 records out of 1000 which is 10%


In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
svm_model = svm.SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=seed)
rf_model.fit(X_train, y_train)

In [None]:
stacking_clf = StackingClassifier(
    estimators=[('svm', svm_model), ('rf', rf_model)],
    final_estimator=LogisticRegression(),
    cv=5
)
stacking_clf.fit(X_train, y_train)

In [None]:
joblib.dump(stacking_clf, 'stacking_clf.pkl')
y_pred = stacking_clf.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.80      0.62        10
           1       0.79      1.00      0.88        11
           2       0.25      0.20      0.22        10
           3       0.56      0.50      0.53        10
           4       0.50      0.50      0.50        10
           5       0.38      0.30      0.33        10
           6       0.82      0.82      0.82        11
           7       0.89      0.80      0.84        10
           8       0.40      0.40      0.40        10
           9       0.43      0.30      0.35        10

    accuracy                           0.57       102
   macro avg       0.55      0.56      0.55       102
weighted avg       0.56      0.57      0.55       102

Accuracy Score: 0.5686274509803921


In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_path= fit_feature("/content/better-day-186374.mp3")
new_song_feature = scaler.transform([new_song_path])
prediction = stacking_clf.predict(new_song_feature)
predicted_genre = encoder.inverse_transform(prediction)
print("Predicted Genre:", predicted_genre[0])

Predicted Genre: blues




#SVM-KNNeighbor


In [None]:
import librosa
import pandas as pd
import numpy as np
import os
import csv
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import classification_report, accuracy_score

In [None]:
general_path = '/content'
print(list(os.listdir(f'{general_path}/genres')))

['blues', 'country', 'hiphop', 'jazz', 'disco', 'pop', 'classical', 'reggae', 'rock', 'metal', '.ipynb_checkpoints']


In [None]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()

for g in genres:
  path_audio = os.path.join('/content',f'{g}')
  os.makedirs(path_audio)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

for g in genres:
    for filename in os.listdir(f'/content/genres/{g}'):
        songname = f'/content/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=3)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
data_3s = pd.read_csv('data-3s.csv')
print(data_3s.head())
print(data_3s.shape)

         filename  chroma_stft      rmse  spectral_centroid  \
0  blues.00030.au     0.252451  0.129469         796.877132   
1  blues.00043.au     0.398613  0.127050        2146.989442   
2  blues.00089.au     0.330216  0.057961         677.501002   
3  blues.00093.au     0.399003  0.056844         640.531249   
4  blues.00011.au     0.395926  0.053601        1387.962171   

   spectral_bandwidth      rolloff  zero_crossing_rate       mfcc1  \
0         1363.858304  1241.720440            0.027813 -261.393524   
1         2346.179667  4924.229455            0.087361 -106.287834   
2         1136.151689  1216.211877            0.022919 -345.310028   
3         1022.053919  1190.123573            0.022397 -350.515137   
4         1815.101760  3007.525916            0.044899 -321.723846   

        mfcc2      mfcc3  ...    mfcc12     mfcc13     mfcc14     mfcc15  \
0  142.396729  11.462423  ... -6.913297 -12.653015 -12.238706 -21.957417   
1  102.910103  -9.609563  ...  6.560434  -9.1604

In [None]:
seed = 12
np.random.seed(seed)
data_shuffle = data_3s.sample(frac=1, random_state=seed).reset_index(drop=True)

In [None]:
data_shuffle = data_shuffle.drop(['filename'], axis=1)
print(data_shuffle.head())

   chroma_stft      rmse  spectral_centroid  spectral_bandwidth      rolloff  \
0     0.277971  0.160153        2562.661518         2478.888737  5153.226788   
1     0.456815  0.153437        2209.864606         2729.672694  5100.636080   
2     0.413502  0.086312        4700.819906         3427.468067  8845.508564   
3     0.304375  0.098867        1853.964030         1944.683935  3848.480319   
4     0.340747  0.065988        1958.418703         2156.088175  4275.003380   

   zero_crossing_rate       mfcc1       mfcc2      mfcc3      mfcc4  ...  \
0            0.117075  -72.641045   71.300972 -10.395139  32.301598  ...   
1            0.072269 -116.014336   91.779205  20.279716  30.649773  ...   
2            0.285780 -129.735794   27.791946  27.142660  17.484444  ...   
3            0.082027 -171.452637  117.102180 -22.616510  32.930000  ...   
4            0.089915 -165.906097  122.589996 -19.829643  27.253536  ...   

     mfcc12     mfcc13    mfcc14     mfcc15    mfcc16    mfcc1

In [None]:
X = data_shuffle.iloc[:, :-1]

In [None]:
genre_list = data_shuffle.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)

[5 8 7 2 3 8 7 8 9 3 5 3 5 5 0 8 1 9 8 1 9 3 1 3 3 1 3 4 4 7 3 9 6 3 6 7 0
 2 6 1 5 9 1 4 7 5 2 0 0 9 1 8 9 5 2 1 6 3 3 3 5 5 4 6 8 4 5 4 5 3 6 1 6 8
 6 0 8 0 5 3 7 7 4 8 3 2 7 1 5 7 9 9 2 5 2 4 8 5 0 1 9 7 7 4 6 8 4 9 8 5 8
 6 5 8 5 2 7 1 2 3 7 0 3 3 9 4 4 3 8 2 2 9 0 9 1 9 4 6 5 0 4 2 9 0 3 1 5 5
 7 9 9 4 4 1 6 3 6 4 4 1 8 2 4 6 7 3 7 0 8 2 3 0 1 9 8 0 3 1 6 4 4 6 3 1 1
 9 8 6 3 8 6 3 7 7 7 9 7 9 0 1 0 9 2 7 7 5 6 4 8 0 4 2 3 2 9 3 7 0 3 9 5 0
 1 4 5 7 1 2 9 2 0 3 8 0 4 5 7 5 4 0 6 1 0 4 6 1 7 6 3 0 9 1 3 2 5 7 9 2 9
 2 2 2 1 5 0 5 9 4 2 5 0 4 9 7 9 9 2 8 4 0 0 0 8 0 2 1 7 5 8 2 7 6 8 2 4 6
 2 2 3 6 8 2 4 9 4 4 1 8 6 5 7 1 0 8 2 9 6 3 5 4 0 7 7 3 5 0 4 5 3 1 1 7 9
 3 5 4 7 2 5 7 0 7 5 9 9 7 9 4 7 3 0 8 2 6 3 0 1 8 3 9 5 9 0 8 1 8 1 9 0 2
 0 9 4 0 9 7 2 0 8 5 4 5 5 3 2 6 2 8 2 8 3 9 8 5 7 4 6 1 5 6 2 7 7 0 8 3 6
 4 3 8 6 7 0 2 0 3 3 6 8 1 3 9 2 5 6 1 0 6 2 4 7 9 4 7 0 9 9 3 9 0 3 6 6 6
 4 8 1 9 2 8 2 9 1 6 0 8 4 3 5 1 9 7 8 7 4 4 1 5 6 2 1 9 3 3 6 7 8 5 1 0 1
 4 7 3 9 8 0 9 5 0 0 0 7 

In [None]:
X_train, df_test_valid_X, y_train, df_test_valid_y = train_test_split(X, y, train_size=0.7, random_state=seed, stratify=y)
X_dev, X_test, y_dev, y_test = train_test_split(df_test_valid_X, df_test_valid_y, train_size=0.66, random_state=seed, stratify=df_test_valid_y)

In [None]:
print(f"Train set has {X_train.shape[0]} records out of {len(data_shuffle)} which is {round(X_train.shape[0]/len(data_shuffle)*100)}%")
print(f"Dev set has {X_dev.shape[0]} records out of {len(data_shuffle)} which is {round(X_dev.shape[0]/len(data_shuffle)*100)}%")
print(f"Test set has {X_test.shape[0]} records out of {len(data_shuffle)} which is {round(X_test.shape[0]/len(data_shuffle)*100)}%")

Train set has 700 records out of 1000 which is 70%
Dev set has 198 records out of 1000 which is 20%
Test set has 102 records out of 1000 which is 10%


In [None]:
scaler = StandardScaler()

In [None]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_dev = pd.DataFrame(scaler.transform(X_dev), columns=X_dev.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [None]:
svm_model = svm.SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

In [None]:
base_estimators = [('svm', svm_model), ('knn', knn_model)]
bagging_clf = BaggingClassifier(base_estimator=svm_model, n_estimators=10, random_state=seed)
bagging_clf.fit(X_train, y_train)



In [None]:
y_pred = bagging_clf.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.40      0.60      0.48        10
           1       0.75      0.82      0.78        11
           2       0.33      0.20      0.25        10
           3       1.00      0.40      0.57        10
           4       0.55      0.60      0.57        10
           5       0.30      0.30      0.30        10
           6       0.67      0.91      0.77        11
           7       0.67      0.80      0.73        10
           8       0.44      0.40      0.42        10
           9       0.38      0.30      0.33        10

    accuracy                           0.54       102
   macro avg       0.55      0.53      0.52       102
weighted avg       0.55      0.54      0.53       102

Accuracy Score: 0.5392156862745098


In [None]:
def fit_feature(songname):
    y, sr = librosa.load(songname, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    feature = np.array([np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr)])
    for e in mfcc:
        feature = np.append(feature, [np.mean(e)])
    return feature

In [None]:
new_song_feature = fit_feature('/content/better-day-186374.mp3')
new_song_feature = scaler.transform([new_song_feature])
prediction = bagging_clf.predict(new_song_feature)
predicted_genre = encoder.inverse_transform(prediction)
print("Predicted Genre:", predicted_genre[0])

Predicted Genre: blues




#Gradient-discent


**Pre-Processing**
- Find Sonogram, Periodicity Histogram,
Spectrum Histogram and Fluctuation Pattern for
the training audio data

**Feature Extraction**

Window the audio data with a Hamming
window.
Amplitude values of the DFT of the data are
found.
Amplitude values are converted to filter bank
outputs.
Log base 10 for the output is calculated.
Find the cosine transform.
Feature vectors are stored in matrix X

**Featur Reduction**

Calculate the empirical mean of X.
The deviations are calculated from the mean
and the data are stored in the matrix B [M N].
Covariance matrix C is found.
Eigenvectors and eigenvalues of the covariance
matrix C are found.
The eigenvectors and eigenvalues are
rearranged to form the feature vector.
The new data set is derived and the eigenvectors
with the highest eigenvalues are projected into
space.
Put the new dataset in a matrix Y

**Classification**

Select ‘m’ initial "means" randomly from the
data set Y.
‘m’ clusters are created by associating every
observation with the nearest mean.
The centroid of each of the ‘m’ clusters
becomes the new means.
The above steps are repeated until convergence
is reached.

**Traning**

Train the features of the audio files using
Gradient Descent Adaptive Learning Back
propagation Network.

**Testing Phase**