In [2]:
import zipfile
#Here we are trying to extract two zip files full of audios : 
#one used for the tests and the 2nd one is for the evalulation test

with zipfile.ZipFile('Dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('./')
    #'./' Allows us to extract our zip file and puts it in the folder right before, where Model_Jazz is already at.
      
with zipfile.ZipFile('Evaluation_Set.zip', 'r') as zip_ref:
    zip_ref.extractall('./')

In [8]:
import librosa

# sr should be set to our recording sample rate (16k)
# x,freq = librosa.load("[my_wav_files_directory]/0000.wav",sr=16000)

x,freq = librosa.load("Dataset/0000.wav",sr=16000)

# The load function will return a time series value (x) and
#   the input sample rate (freq) which is 16000

print("The duration of 0000.wav in seconds:",len(x)/freq)


#This part of our code returns : the duration of our 1st sound file (0000.wav) and it lasts : 5.494s

The duration of FR_00.wav in seconds: 5.494


In [9]:
#This function will return n_mfcc number of MFCC per
#a window of time in audio time series

x_mfcc=librosa.feature.mfcc(x,sr=freq, n_mfcc=40)
print(x_mfcc.shape)

# x_mfcc is an array with 40 values for a window of time
# The len(x_mfcc) is a proportion of wav file duration (5-6 seconds)

(40, 172)


In [7]:
import numpy as np

def feature_extractor_1(test):
    
    #We use librosa's library in order to load the audio files
    
    x,freq = librosa.load(test,sr=16000)
    
    # we define the number of MFCCS we want to extract here it's : 20 MFCCs
    mfcc=librosa.feature.mfcc(x,sr=freq,n_mfcc=20)
    
    #We calculate the mean and variance of each MFFC 
    
    mean_mfccs=np.mean(mfcc,axis=1)
    var_mfccs=np.var(mfcc,axis=1)
    
    #return mean and variance as the audio file feature 
    
    return list(mean_mfccs)+list(var_mfccs)

In [8]:
feature_extractor_1("Dataset/0000.wav")

[-258.6834,
 67.40751,
 -28.823479,
 -5.7446127,
 -19.878378,
 -2.3387473,
 -10.67504,
 -12.761739,
 -6.636203,
 -3.2882953,
 -6.5215135,
 6.566639,
 4.260296,
 11.507818,
 -2.4033027,
 5.3996015,
 -7.9286613,
 -1.8552961,
 -5.5140543,
 0.70949876,
 16593.23,
 891.0893,
 852.84503,
 654.8466,
 549.3666,
 315.10284,
 137.62134,
 128.09657,
 221.30846,
 142.26477,
 100.25705,
 86.99765,
 105.27958,
 75.27547,
 76.57926,
 117.44688,
 104.93364,
 85.19154,
 123.76393,
 104.782616]

In [9]:
import csv

#set data_dir to our directory of our data files
data_dir= "Dataset/"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.txt", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])
        # The last column contains the lable (language)
        label_list.append(row[-1]) 
        
        
# create a dictionary for labels
lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}

# create a list of extracted feature (MFCC) for files
x_data=[]

for audio_file in file_list:
    file_feature = feature_extractor_1(data_dir+audio_file)
    #add extracted feature to dataset 
    x_data.append(file_feature)

# create a list of labels of the different languages in our files
y_data=[]
for lang_label in label_list:
    #convert the label to a value in {0,1,2,3} as the class label for {en, fr , ar , jp}
    y_data.append(lang_dic[lang_label])

In [21]:
import csv

#set data_dir to our directory of our data files
data_dir= "Evaluation_Set/"

# Read file info file to get the list of audio files and their labels
file_list=[]
label_list=[]
with open(data_dir+"Info.txt", 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        # The first column contains the file name
        file_list.append(row[0])
        # The last column contains the lable (language)
        label_list.append(row[-1]) 
        
        
# create a dictionary for labels
lang_dic={'EN':0,'FR':1,'AR':2,'JP':3}

# create a list of extracted feature (MFCC) for files
x_test=[]

for audio_file in file_list:
    file_feature = feature_extractor_1(data_dir+audio_file)
    #add extracted feature to dataset 
    x_test.append(file_feature)

# create a list of labels of the different languages in our files
y_test=[]
for lang_label in label_list:
    #convert the label to a value in {0,1,2,3} as the class label for {en, fr , ar , jp}
    y_test.append(lang_dic[lang_label])

In [10]:
x_data

[[-258.6834,
  67.40751,
  -28.823479,
  -5.7446127,
  -19.878378,
  -2.3387473,
  -10.67504,
  -12.761739,
  -6.636203,
  -3.2882953,
  -6.5215135,
  6.566639,
  4.260296,
  11.507818,
  -2.4033027,
  5.3996015,
  -7.9286613,
  -1.8552961,
  -5.5140543,
  0.70949876,
  16593.23,
  891.0893,
  852.84503,
  654.8466,
  549.3666,
  315.10284,
  137.62134,
  128.09657,
  221.30846,
  142.26477,
  100.25705,
  86.99765,
  105.27958,
  75.27547,
  76.57926,
  117.44688,
  104.93364,
  85.19154,
  123.76393,
  104.782616],
 [-203.25566,
  52.142918,
  -35.55732,
  1.7210965,
  -24.60804,
  -9.533248,
  -21.781046,
  -9.336933,
  -8.352583,
  -1.1451396,
  -8.473974,
  7.1310625,
  -1.1384887,
  7.838623,
  -8.56028,
  3.3986382,
  -9.73995,
  -2.6992145,
  -2.4740555,
  3.5295494,
  9020.121,
  1270.7625,
  969.33527,
  506.15643,
  340.49957,
  224.8313,
  141.8196,
  114.84066,
  146.50648,
  116.97948,
  70.17239,
  75.02169,
  80.87344,
  89.41007,
  57.80784,
  48.159554,
  102.33924,
 

In [11]:
y_data

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [12]:
import random

# shuffle two lists
temp_list = list(zip(x_data, y_data))
random.shuffle(temp_list)
x_data, y_data = zip(*temp_list)

In [13]:
x_data

([-381.19476,
  56.838444,
  -5.2914367,
  8.779498,
  -29.77187,
  -15.748202,
  -32.85759,
  -16.120298,
  -13.405227,
  -9.438471,
  -17.741957,
  -8.015152,
  -9.201319,
  -6.4820695,
  -8.169966,
  -7.519759,
  -3.9818597,
  -1.8497701,
  -3.5603118,
  -7.383063,
  11693.53,
  2759.6062,
  1779.8671,
  653.1924,
  598.8573,
  230.29608,
  187.1662,
  195.0919,
  151.93936,
  112.21776,
  99.46823,
  135.92747,
  91.03315,
  41.639843,
  51.79584,
  51.522274,
  80.63597,
  71.529686,
  55.10745,
  49.70192],
 [-430.92072,
  93.25357,
  -28.234789,
  24.039259,
  0.5993592,
  -6.2546372,
  -13.005282,
  -20.204494,
  8.179847,
  -6.978095,
  -1.8419465,
  -7.822628,
  3.6375515,
  -1.800314,
  -2.7946627,
  4.3643885,
  -6.6210613,
  -5.9235206,
  -9.542341,
  -11.778069,
  6117.2275,
  1545.5166,
  1026.1876,
  701.05585,
  316.75342,
  250.61063,
  236.60313,
  249.12433,
  245.83781,
  123.99239,
  58.531025,
  106.66033,
  57.807858,
  76.73808,
  71.86299,
  55.28395,
  48.761

In [14]:
y_data

(0,
 3,
 1,
 3,
 3,
 0,
 2,
 2,
 3,
 2,
 2,
 0,
 2,
 2,
 1,
 0,
 1,
 3,
 0,
 1,
 1,
 3,
 0,
 1,
 0,
 0,
 1,
 3,
 2,
 2,
 1,
 3,
 3,
 0,
 3,
 1,
 0,
 2,
 2,
 0,
 0,
 1,
 3,
 1,
 0,
 2,
 3,
 3,
 1,
 1,
 1,
 2,
 1,
 2,
 0,
 0,
 1,
 0,
 1,
 2,
 3,
 1,
 1,
 1,
 1,
 0,
 2,
 1,
 1,
 3,
 1,
 0,
 3,
 0,
 1,
 2,
 2,
 2,
 3,
 0,
 2,
 1,
 2,
 0,
 3,
 1,
 2,
 1,
 3,
 2,
 0,
 1,
 1,
 2,
 1,
 3,
 2,
 1,
 0,
 2,
 3,
 2,
 1,
 1,
 2,
 3,
 2,
 3,
 3,
 0,
 2,
 3,
 3,
 2,
 0,
 2,
 3,
 1,
 3,
 2,
 2,
 2,
 0,
 0,
 0,
 3,
 3,
 3,
 2,
 0,
 3,
 1,
 2,
 2,
 3,
 2,
 1,
 2,
 0,
 3,
 2,
 0,
 3,
 1,
 2,
 3,
 2,
 0,
 3,
 3,
 0,
 2,
 1,
 2,
 1,
 3,
 0,
 1,
 3,
 1,
 2,
 2,
 3,
 3,
 2,
 3,
 0,
 3,
 1,
 3,
 3,
 0,
 3,
 3,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 2,
 3,
 0,
 3,
 0,
 2,
 1,
 3,
 1,
 1,
 3,
 0,
 1,
 0,
 0,
 0,
 1,
 3,
 2,
 1,
 1,
 1,
 3,
 3,
 2,
 0,
 2,
 2,
 2,
 3,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 1,
 1,
 3,
 3,
 3,
 3,
 1,
 3,
 3,
 2,
 0,
 2,
 2,
 3,
 0,
 1,
 0,
 3,
 1,
 1,
 2,
 2,
 0,
 2,
 3,
 2,
 3,
 2,
 0,
 0,


In [18]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(max_depth=2)
clf.fit(x_data, y_data)
# the resulted accuracy is on a small set which is same for train and test
print("Accuracy",clf.score(x_data, y_data))

Accuracy 0.6129032258064516


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clf = RandomForestClassifier(max_depth=2)
clf.predict(x_data, y_data)

In [19]:
from sklearn.model_selection import train_test_split
x_data, x_test, y_data, y_test = train_test_split(x_data,y_data,test_size=0.33,shuffle=True)

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clf = RandomForestClassifier(max_depth = 2, random_state = 0)
clf=clf.fit(x_data,y_data)
clf.score(x_test,y_test)

0.5076923076923077