In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import os
import librosa as librosa
import warnings
warnings.filterwarnings('ignore')

In [6]:
path = os.getcwd()
path

'/home/aqeelali7/Documents/Galvanize/Capstone-2-Music-Genre-Classifier/Song-Genre-Classification/src'

In [7]:
p = '/home/aqeelali7/Documents/Galvanize/Capstone-2-Music-Genre-Classifier/'

In [8]:
genres = ['hip-hop', 'classical', 'country', 'electronic', 'metal']

In [9]:
def extract_audio_features(y, sr):
        '''
        This function extracts audio features from an audio file.
                Parameters:
                        id (string): the audio track id 
                        y 
                        sr 
                Returns:
                        audio_features (DataFrame): the extracted audio features
        '''
        # Features to concatenate in the final dictionary
        features = {'chroma_sftf': None, 'rolloff': None, 'zero_crossing_rate': None, 'rmse': None,
                    'flux': None, 'contrast': None, 'flatness': None}

        # Count silence
        if 0 < len(y):
            y_sound, _ = librosa.effects.trim(y)
        features['sample_silence'] = len(y) - len(y_sound)            

        # Using librosa to calculate the features
        features['chroma_sftf'] = np.mean(
            librosa.feature.chroma_stft(y=y, sr=sr))
        features['rolloff'] = np.mean(
            librosa.feature.spectral_rolloff(y, sr=sr))
        features['zero_crossing_rate'] = np.mean(
            librosa.feature.zero_crossing_rate(y))
        features['rmse'] = np.mean(librosa.feature.rms(y))
        features['flux'] = np.mean(librosa.onset.onset_strength(y=y, sr=sr))
        features['contrast'] = np.mean(
            librosa.feature.spectral_contrast(y, sr=sr))
        features['flatness'] = np.mean(librosa.feature.spectral_flatness(y))

        # MFCC treatment
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        
        for idx, v_mfcc in enumerate(mfcc):
            features['avg_mfcc'] = np.mean(v_mfcc)
            
        

        features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
        return features

In [10]:

#     in each genre:

#     iterate to next genre



# add the list as the contents of the row of a dataframe

# create list of features for each song
chroma_sftf = []
flux = [] 
rolloff = []
zero_crossing_rate = []
contrast = []
flatness = []
rmse = []
tempo = []
genre = []
mfcc = []
sr = 22050

# loop thru the list of genres
for i in range(len(genres)):

    data_dir = "data/"+genres[i]
    
    new_path = os.path.join(p,data_dir)
    os.chdir(new_path)
    
#   extract audio features using librosa library
#   store extracted audio features in a dataframe
    count = 0
    for track_num in range(len(os.listdir())):

        audio, amplitude = librosa.load((os.listdir()[track_num]))
        audio = np.array(audio)
        amplitude = np.array(amplitude)
        curr_song_feats = extract_audio_features(audio,sr)
        
        # for each song, append data to respective features list
        chroma_sftf.append(curr_song_feats['chroma_sftf'])
        flux.append(curr_song_feats['flux'])
        rolloff.append(curr_song_feats['rolloff'])
        zero_crossing_rate.append(curr_song_feats['zero_crossing_rate'])
        contrast.append(curr_song_feats['contrast'])
        flatness.append(curr_song_feats['flatness'])
        rmse.append(curr_song_feats['rmse'])
        tempo.append(curr_song_feats['tempo'])
        genre.append(genres[i])
        mfcc.append(curr_song_feats['avg_mfcc'])
        
        count += 1
        
        if count%100 == 0:
            print('100 songs done')
        
        
        
    print('done with ', genres[i])
    
        

        


100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
done with  hip-hop
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
done with  classical
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
done with  country
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
done with  electronic
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
100 songs done
done with  metal


In [24]:
col_feats = list(zip(genre,mfcc,chroma_sftf, flux, rolloff, zero_crossing_rate, contrast,flatness,rmse,tempo))
col_names = ["genre", "mfcc","chroma_sftf", "flux", "rolloff", "zero_crossing_rate", "contrast","flatness","rmse","tempo"]

df = pd.DataFrame(col_feats, columns = col_names)
df.head()

Unnamed: 0,genre,mfcc,chroma_sftf,flux,rolloff,zero_crossing_rate,contrast,flatness,rmse,tempo
0,hip-hop,4.450824,0.431049,1.36542,4318.922711,0.0683,19.013011,0.025153,0.345805,151.999081
1,hip-hop,-0.377781,0.511142,1.667078,4419.694105,0.062677,18.660331,0.022863,0.176869,123.046875
2,hip-hop,0.259693,0.522985,1.359143,5745.967884,0.159102,20.103879,0.05365,0.276045,95.703125
3,hip-hop,0.297143,0.399623,1.74403,4064.054853,0.060242,21.550626,0.02077,0.365921,95.703125
4,hip-hop,-3.304997,0.288411,1.144687,1368.274351,0.039064,21.798417,0.00036,0.072659,117.453835


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4376 entries, 0 to 4375
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   genre               4376 non-null   object 
 1   mfcc                4376 non-null   float64
 2   chroma_sftf         4376 non-null   float64
 3   flux                4376 non-null   float64
 4   rolloff             4376 non-null   float64
 5   zero_crossing_rate  4376 non-null   float64
 6   contrast            4376 non-null   float64
 7   flatness            4376 non-null   float64
 8   rmse                4376 non-null   float64
 9   tempo               4376 non-null   float64
dtypes: float64(9), object(1)
memory usage: 342.0+ KB


In [32]:
result = df.groupby(['genre'])

In [33]:
for group in result:
    print("\n",group)


 ('classical',           genre       mfcc  chroma_sftf      flux      rolloff  \
916   classical   5.060514     0.231019  0.910790  1291.001294   
917   classical -11.064908     0.246337  1.078530   889.023188   
918   classical  -2.022367     0.207204  0.741518   958.002714   
919   classical  -1.778825     0.323277  1.403619  2468.449497   
920   classical -11.400709     0.189234  0.983833  1388.624937   
...         ...        ...          ...       ...          ...   
1810  classical -11.497581     0.244424  1.062850  1737.852729   
1811  classical -11.767015     0.187028  0.870764   967.994920   
1812  classical   0.282190     0.221288  0.789507  1112.490540   
1813  classical  -8.714202     0.201549  1.042589  1208.099294   
1814  classical   4.089860     0.239119  1.123193  1825.138346   

      zero_crossing_rate   contrast  flatness      rmse       tempo  
916             0.046479  26.395268  0.000320  0.100509  112.347147  
917             0.035983  25.812452  0.000142  0.08

In [46]:
result2 = df.groupby('genre').head(761).reset_index(drop=True)
type(result2)

pandas.core.frame.DataFrame

In [57]:
import pickle
save_model = open("df.MusicFeatures", "wb")
pickle.dump(df, save_model)
save_model.close()

print(os.getcwd())
pat = '/home/aqeelali7/Documents/Galvanize/Capstone-2-Music-Genre-Classifier/Song-Genre-Classification/data'
os.chdir(pat)
with open(pat, 'wb') as f:
    pickle.dump(object, f)
    
new_path = os.path.join(pat,data_dir)
df.to_pickle('original_df.pickle')
result2.to_pickle('MusicFeats.pickle')

/home/aqeelali7/Documents/Galvanize/Capstone-2-Music-Genre-Classifier/data/metal


IsADirectoryError: [Errno 21] Is a directory: '/home/aqeelali7/Documents/Galvanize/Capstone-2-Music-Genre-Classifier/Song-Genre-Classification/data'

In [52]:
df.to_pickle('original_df.pickle')
result2.to_pickle('MusicFeats.pickle')

In [54]:
df2 = pd.read_pickle('my_df.pickle')
df3 = pd.read_pickle('MusicFeats.pickle')
df3

Unnamed: 0,genre,mfcc,chroma_sftf,flux,rolloff,zero_crossing_rate,contrast,flatness,rmse,tempo
0,hip-hop,4.450824,0.431049,1.365420,4318.922711,0.068300,19.013011,0.025153,0.345805,151.999081
1,hip-hop,-0.377781,0.511142,1.667078,4419.694105,0.062677,18.660331,0.022863,0.176869,123.046875
2,hip-hop,0.259693,0.522985,1.359143,5745.967884,0.159102,20.103879,0.053650,0.276045,95.703125
3,hip-hop,0.297143,0.399623,1.744030,4064.054853,0.060242,21.550626,0.020770,0.365921,95.703125
4,hip-hop,-3.304997,0.288411,1.144687,1368.274351,0.039064,21.798417,0.000360,0.072659,117.453835
...,...,...,...,...,...,...,...,...,...,...
3800,metal,0.811577,0.566339,1.299508,4737.379629,0.112808,18.067332,0.039229,0.211604,86.132812
3801,metal,2.362003,0.493252,1.079446,4911.343929,0.085464,18.721121,0.025655,0.273660,99.384014
3802,metal,-0.901563,0.419331,1.319956,5249.888182,0.118459,19.661044,0.038292,0.108418,112.347147
3803,metal,0.775025,0.561058,0.985969,5249.055499,0.099681,18.164477,0.026418,0.331060,123.046875


In [8]:
plt.rc("font", size=14)
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)
%matplotlib inline
import scipy.stats as scs
import scipy
from sklearn.linear_model import LogisticRegression, LinearRegression
import itertools
from sklearn.preprocessing import StandardScaler # data normalization
from sklearn.metrics import precision_score # evaluation metric
from sklearn.metrics import accuracy_score # evaluation metric
from sklearn.metrics import recall_score # evaluation metric
from sklearn.metrics import classification_report # evaluation metric
from sklearn.metrics import confusion_matrix # evaluation metric
from sklearn.metrics import log_loss # evaluation metric
from matplotlib import rcParams # plot size customization
rcParams['figure.figsize'] = (20, 10)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1062 entries, 0 to 1061
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   genre               1062 non-null   object 
 1   mfcc                1062 non-null   float64
 2   chroma_sftf         1062 non-null   float64
 3   flux                1062 non-null   float64
 4   rolloff             1062 non-null   float64
 5   zero_crossing_rate  1062 non-null   float64
 6   contrast            1062 non-null   float64
 7   flatness            1062 non-null   float64
 8   rmse                1062 non-null   float64
 9   tempo               1062 non-null   float64
dtypes: float64(9), object(1)
memory usage: 83.1+ KB


In [10]:

y = df["genre"]
X = df.drop('genre',axis=1,inplace=False)


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
lr = LogisticRegression(C = 1000, solver = 'liblinear')
lr.fit(X_train,y_train)
yhat = lr.predict(X_test)
yhat_prob = lr.predict_proba(X_test)

print("Accuracy Score : ", accuracy_score(yhat,y_test))

print("Precision Score : ",precision_score(y_test, yhat, 
                                           pos_label='positive',
                                           average='macro'))
print("Recall Score : ",recall_score(y_test, yhat, 
                                           pos_label='positive',
                                           average='macro'))

Accuracy Score :  0.5300751879699248
Precision Score :  0.5035256693565252
Recall Score :  0.5131779438072188


In [12]:
'''
gen = df.loc[:,"genre"]
dums = pd.get_dummies(gen,drop_first=True)
df.drop('genre',axis=1,inplace=True)
dums.head()
df = pd.concat([df,dums],axis=1)

df.head()
'''

'\ngen = df.loc[:,"genre"]\ndums = pd.get_dummies(gen,drop_first=True)\ndf.drop(\'genre\',axis=1,inplace=True)\ndums.head()\ndf = pd.concat([df,dums],axis=1)\n\ndf.head()\n'

In [67]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)

# Train the model using the training sets
model.fit(X_train,y_train)

#Predict Output
y_pred= model.predict(X_test)


In [68]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.37969924812030076


In [76]:
from sklearn.ensemble import RandomForestClassifier

scores = []
for i in range(100):
    clf = RandomForestClassifier(n_estimators=15)

    # Train the model using the training sets
    clf.fit(X_train,y_train)

    #Predict Output
    y_pred= clf.predict(X_test)
    scores.append(metrics.accuracy_score(y_test, y_pred))

print(np.mean(scores))
                  

0.6079699248120299


In [77]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.5977443609022557


In [60]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True)  # almost always use shuffle=True
fold_scores = []

for train, test in kf.split(X):
    model = LogisticRegression()
    model.fit(X.values[train], y.values[train])
    fold_scores.append(model.score(X.values[test], y.values[test]))
    
print(np.mean(fold_scores))

0.4698999025600141


baseline is the probability of randomly sleecting a song...

baseline is about 20%