# Accent recognition #

In [2]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
from keras import layers
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import classification_report
import os
from IPython.display import HTML, display
import time
import warnings
warnings.filterwarnings('ignore')

Reading the data

In [3]:
data = pd.read_csv('./speakers_all.csv', index_col='speakerid')
data.head()

Unnamed: 0_level_0,age,age_onset,birthplace,filename,native_language,sex,country,file_missing?,Unnamed: 9,Unnamed: 10,Unnamed: 11
speakerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
788,24.0,12.0,"koussi, senegal",balanta,balanta,male,senegal,True,,,
1953,18.0,10.0,"buea, cameroon",cameroon,cameroon,male,cameroon,True,,,
1037,48.0,8.0,"hong, adamawa, nigeria",fulfulde,fulfulde,male,nigeria,True,,,
1165,42.0,42.0,"port-au-prince, haiti",haitian,haitian,male,haiti,True,,,
1166,40.0,35.0,"port-au-prince, haiti",haitian,haitian,male,haiti,True,,,


Data type of columns

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2172 entries, 788 to 406
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   age              2172 non-null   float64
 1   age_onset        2172 non-null   float64
 2   birthplace       2168 non-null   object 
 3   filename         2172 non-null   object 
 4   native_language  2172 non-null   object 
 5   sex              2172 non-null   object 
 6   country          2167 non-null   object 
 7   file_missing?    2172 non-null   bool   
 8   Unnamed: 9       0 non-null      float64
 9   Unnamed: 10      0 non-null      float64
 10  Unnamed: 11      1 non-null      object 
dtypes: bool(1), float64(4), object(6)
memory usage: 188.8+ KB


Dropping columns 8-10 as they are not required 

In [5]:
data.drop(data.columns[8:11], axis=1, inplace=True)
data=data.fillna("NaN")
data.head()

Unnamed: 0_level_0,age,age_onset,birthplace,filename,native_language,sex,country,file_missing?
speakerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
788,24.0,12.0,"koussi, senegal",balanta,balanta,male,senegal,True
1953,18.0,10.0,"buea, cameroon",cameroon,cameroon,male,cameroon,True
1037,48.0,8.0,"hong, adamawa, nigeria",fulfulde,fulfulde,male,nigeria,True
1165,42.0,42.0,"port-au-prince, haiti",haitian,haitian,male,haiti,True
1166,40.0,35.0,"port-au-prince, haiti",haitian,haitian,male,haiti,True


In [8]:
from pathlib import Path

dir = "./recordings/"

def feature_engineering(dir, data):
    cnt = 0

    df = pd.DataFrame()
    tmp = {}

    for idx, row in data.iterrows():
        # if file does not exist in the recordings folder, skip it
        if os.path.isfile(dir+row['filename']+'.mp3')==False:
            print('File '+str(row['filename'])+".mp3 doesn't exist")
            # also remove it from the dataframe
            data=data.drop([idx])
            continue
        tmp['filename']=row['filename']
        tmp['country']=row['country']
        y, sr=librosa.load(os.path.join(os.path.abspath(dir),row['filename']+'.mp3'))
        tmp['rms']=np.mean(librosa.feature.rms(y=y))
        tmp['chroma_stft']=np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
        tmp['spec_cent']=np.mean(librosa.feature.spectral_centroid(y=y,sr=sr))
        tmp['spec_bw']=np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        tmp['rolloff']=np.mean(librosa.feature.spectral_rolloff(y=y,sr=sr))
        tmp['zcr']=np.mean(librosa.feature.zero_crossing_rate(y))
        mfcc=librosa.feature.mfcc(y=y, sr=sr)
        i=0
        for e in mfcc: 
            tmp['mfcc'+str(i)]=np.mean(e)
            i+=1
        df=df.append([tmp])
        print(cnt)
        cnt+=1
    return df

In [9]:
# creating the dataframe
df = feature_engineering(dir, data)

File balanta.mp3 doesn't exist
File cameroon.mp3 doesn't exist
File fulfulde.mp3 doesn't exist
File haitian.mp3 doesn't exist
File haitian.mp3 doesn't exist
File haitian.mp3 doesn't exist
File haitian.mp3 doesn't exist
File haitian.mp3 doesn't exist
File haitian.mp3 doesn't exist
File hawai'i.mp3 doesn't exist
File hawai'i.mp3 doesn't exist
File jamaican.mp3 doesn't exist
File jamaican.mp3 doesn't exist
File jamaican.mp3 doesn't exist
File liberian.mp3 doesn't exist
File liberian.mp3 doesn't exist
File nicaragua.mp3 doesn't exist
File nicaragua.mp3 doesn't exist
File nicaragua.mp3 doesn't exist
File northern.mp3 doesn't exist
File poonchi1.mp3 doesn't exist
File sinhalese2.mp3 doesn't exist
File sinhalese3.mp3 doesn't exist
File sinhalese4.mp3 doesn't exist
File sinhalese5.mp3 doesn't exist
File swiss.mp3 doesn't exist
File swiss.mp3 doesn't exist
File swiss.mp3 doesn't exist
File swiss.mp3 doesn't exist
File swiss.mp3 doesn't exist
File tetun-dili1.mp3 doesn't exist
File tok.mp3 doesn

Saving the processed data to a new csv file inorder to redude time taken to read the data(if done again)

In [10]:
df.to_csv('processedMetadata.csv')

In [47]:
# Reading the dataframe
df = pd.read_csv('processedMetadata.csv')

In [121]:
df.isnull().sum()

Unnamed: 0     0
filename       0
country        5
rms            0
chroma_stft    0
spec_cent      0
spec_bw        0
rolloff        0
zcr            0
mfcc0          0
mfcc1          0
mfcc2          0
mfcc3          0
mfcc4          0
mfcc5          0
mfcc6          0
mfcc7          0
mfcc8          0
mfcc9          0
mfcc10         0
mfcc11         0
mfcc12         0
mfcc13         0
mfcc14         0
mfcc15         0
mfcc16         0
mfcc17         0
mfcc18         0
mfcc19         0
dtype: int64

In [122]:
# removing ones with missing values
df=df.dropna()

In [129]:
# removing the unnamed column
df=df.drop(df.columns[0], axis=1)

In [130]:
df.shape

(2133, 28)

In [131]:
df.head()

Unnamed: 0,filename,country,rms,chroma_stft,spec_cent,spec_bw,rolloff,zcr,mfcc0,mfcc1,...,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19
0,afrikaans1,south africa,0.105244,0.337555,1805.986179,2007.324973,3541.95929,0.088264,-280.80106,99.21216,...,-5.140613,-2.732358,-11.584127,-2.19187,-7.22958,-4.382146,-5.336466,-4.357283,-2.833485,-4.200771
1,afrikaans2,south africa,0.066111,0.424563,2399.975687,2224.914991,4481.421492,0.124069,-247.78566,92.795616,...,-14.21398,-1.349804,-6.842897,-6.751433,-0.419902,1.254955,-5.173705,-3.33727,1.745803,0.012325
2,afrikaans3,south africa,0.083056,0.365872,1672.684768,1699.227508,3102.185185,0.093211,-297.33353,109.30961,...,-2.047613,-6.172817,-9.027949,-9.28883,-0.902618,4.665127,-6.623897,-3.571043,2.065423,-3.365135
3,afrikaans4,south africa,0.093097,0.375393,1729.141216,1872.197782,3355.100945,0.084542,-248.5224,115.01508,...,-12.569201,2.03717,-6.359156,-2.060952,1.62875,1.188446,-4.459551,2.265979,1.125938,0.355211
4,afrikaans5,south africa,0.035734,0.435086,2278.840563,2161.053169,4042.717059,0.140209,-324.74405,103.79608,...,-6.980526,-0.427644,-3.936378,-5.908207,-3.56923,2.869799,-5.051334,3.448889,-1.428357,3.386606


In [134]:
df.iloc[1][0]

'afrikaans2'

In [125]:
features = []
labels = []

In [135]:
for i in range(len(df)):
    # if country value is not present, skip it
    labels.append(df.iloc[i][1])
    feat = []
    for j in range(2, len(df.columns)):
        feat.append(df.iloc[i][j])
    features.append(feat)

In [136]:
features = np.array(features)
labels = np.array(labels)

In [137]:
# performing one-hot encoding on the labels
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

labelencoder_y = LabelEncoder()
labels = labelencoder_y.fit_transform(labels)

onehotencoder = OneHotEncoder(sparse=False)
labels = onehotencoder.fit_transform(labels.reshape(-1, 1))

In [138]:
print(labels.shape)
print(features.shape)

(6153, 176)
(6153, 26)


In [139]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [150]:
print((X_train[0]))
print((X_test[0]))

[ 4.38089040e-02  3.09697400e-01  2.99695701e+03  1.95073787e+03
  4.80107243e+03  2.24794163e-01 -3.97860170e+02  4.30768400e+01
  2.21480060e+01  2.85843560e+01  5.55811000e+00 -1.65495660e+01
 -1.42404800e+01 -5.55939500e+00 -1.61858000e+01 -1.66162570e+01
 -5.43696360e+00  3.25516340e+00 -4.77179570e+00  2.56185150e+00
 -1.29721760e+01 -7.45177400e+00 -1.82707890e+00 -8.90464700e+00
 -2.03340270e+00 -8.87920400e+00]
[ 3.02068600e-02  4.37452700e-01  2.75145350e+03  2.44524329e+03
  5.23109800e+03  1.28865074e-01 -3.33952400e+02  7.21960700e+01
 -1.00914450e+00  2.16039920e+01  8.23161700e+00 -5.22393300e+00
 -7.89784700e+00  2.19918200e+00 -2.71585150e+00  6.25784600e+00
 -1.01847250e+01  1.03270210e+01 -6.35358570e+00  5.94352400e+00
  9.35334700e-02 -3.92647000e-01  1.09524680e+00  4.37111200e+00
 -4.26072840e+00  4.71985600e+00]


In [141]:
from sklearn.linear_model import LinearRegression

In [142]:
# creating the model
model = LinearRegression()
model.fit(X_train, y_train)

In [152]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(4922, 26)
(4922, 176)
(1231, 26)
(1231, 176)


In [154]:
print(x_train)

           rms  chroma_stft    spec_cent      spec_bw      rolloff       zcr  \
1071  0.066519     0.404329  2149.287909  2252.665215  4244.184723  0.091941   
2120  0.047666     0.366316  2798.287743  2440.780382  5172.293908  0.160409   
192   0.047369     0.284415  2095.503102  1520.196243  3588.843769  0.149398   
678   0.072790     0.310086  1635.722358  1485.048169  3089.903340  0.093678   
486   0.046216     0.430128  2770.943704  2109.368898  4757.390649  0.184088   
...        ...          ...          ...          ...          ...       ...   
1638  0.061455     0.406390  1532.985161  1451.624240  2865.903999  0.094455   
1095  0.072788     0.388573  1740.756714  1911.783724  3267.260215  0.081928   
1130  0.048594     0.343860  1928.389834  1871.960700  3640.409170  0.104213   
1294  0.057904     0.405750  2525.298649  2282.655065  4382.839037  0.140913   
860   0.038936     0.383960  3269.172107  2328.411759  5558.414401  0.217823   

          mfcc0       mfcc1      mfcc2 

In [143]:
ty_pred = model.predict(X_test)

In [144]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_pred)
print("Mean squared error: {:.2f}".format(mse))

Mean squared error: 0.01


In [146]:
print(y_test.shape)
print(y_pred.shape)

(1231, 176)
(1231, 177)


In [149]:
print(y_test[0])
print(y_pred[0])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 

In [145]:
from sklearn.metrics import accuracy_score

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

ValueError: inconsistent shapes

In [98]:
for i in range(len(features)):
    print(features[i], labels[i])

[ 1.05244090e-01  3.37554930e-01  1.80598618e+03  2.00732497e+03
  3.54195929e+03  8.82637919e-02 -2.80801060e+02  9.92121600e+01
  1.62540600e+01  4.01890700e+01  8.69978200e+00 -8.71958100e+00
 -2.72435330e+00 -3.95567580e+00 -1.04244800e+00 -1.09315250e+01
 -5.14061300e+00 -2.73235800e+00 -1.15841270e+01 -2.19187020e+00
 -7.22958040e+00 -4.38214600e+00 -5.33646630e+00 -4.35728300e+00
 -2.83348540e+00 -4.20077130e+00] [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 6.61110000e-02  4.24563470e-01  2.