**IMPORT DEPENDENCIES**

In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Memory growth enabled for {len(gpus)} GPU(s).")

Memory growth enabled for 1 GPU(s).


In [4]:
import tensorflow as tf
import numpy as np
import pandas as pd
import nltk
import tensorflow_io as tfio

In [80]:
import librosa
import os
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("speakers_all.csv")

In [3]:
df

Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,file_missing?,Unnamed: 9,Unnamed: 10,Unnamed: 11
0,24.0,12.0,"koussi, senegal",balanta,balanta,male,788,senegal,True,,,
1,18.0,10.0,"buea, cameroon",cameroon,cameroon,male,1953,cameroon,True,,,
2,48.0,8.0,"hong, adamawa, nigeria",fulfulde,fulfulde,male,1037,nigeria,True,,,
3,42.0,42.0,"port-au-prince, haiti",haitian,haitian,male,1165,haiti,True,,,
4,40.0,35.0,"port-au-prince, haiti",haitian,haitian,male,1166,haiti,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2167,46.0,5.0,"lagos, nigeria",yoruba3,yoruba,female,766,nigeria,False,,,
2168,46.0,12.0,"lagos, nigeria",yoruba4,yoruba,male,851,nigeria,False,,,
2169,47.0,2.0,"ibadan, nigeria",yoruba5,yoruba,female,2023,nigeria,False,,,
2170,31.0,1.0,"bethel, alaska, usa",yupik1,yupik,female,571,usa,False,,,


In [4]:
df = df[df["file_missing?"]==False]

In [5]:
df

Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,file_missing?,Unnamed: 9,Unnamed: 10,Unnamed: 11
32,27.0,9.0,"virginia, south africa",afrikaans1,afrikaans,female,1,south africa,False,,,
33,40.0,5.0,"pretoria, south africa",afrikaans2,afrikaans,male,2,south africa,False,,,
34,43.0,4.0,"pretoria, transvaal, south africa",afrikaans3,afrikaans,male,418,south africa,False,,,
35,26.0,8.0,"pretoria, south africa",afrikaans4,afrikaans,male,1159,south africa,False,,,
36,19.0,6.0,"cape town, south africa",afrikaans5,afrikaans,male,1432,south africa,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2167,46.0,5.0,"lagos, nigeria",yoruba3,yoruba,female,766,nigeria,False,,,
2168,46.0,12.0,"lagos, nigeria",yoruba4,yoruba,male,851,nigeria,False,,,
2169,47.0,2.0,"ibadan, nigeria",yoruba5,yoruba,female,2023,nigeria,False,,,
2170,31.0,1.0,"bethel, alaska, usa",yupik1,yupik,female,571,usa,False,,,


In [6]:
men = df[df["sex"]=="male"]

**DATA PREPROCESSING**

In [7]:
import os
import shutil
def organize_audio_files(dataframe, source_directory):
    if not os.path.exists(source_directory):
        raise ValueError(f"Source directory {source_directory} does not exist.")
    
    for index, row in dataframe.iterrows():
        audio_clip = row['filename']+".mp3"
        language = row['native_language']
        source_file_path = os.path.join(source_directory, audio_clip)
        
        if not os.path.exists(source_file_path):
            print(f"Warning: {audio_clip} not found in {source_directory}. Skipping this file.")
            continue
        
        destination_directory = os.path.join(source_directory, language)
        
        if not os.path.exists(destination_directory):
            os.makedirs(destination_directory)
        
        destination_file_path = os.path.join(destination_directory, audio_clip)
        shutil.move(source_file_path, destination_file_path)
        print(f"Moved {audio_clip} to {destination_directory}")

In [8]:
organize_audio_files(men,fr"Data/recordings")



In [42]:
def replace_mp3_with_wav_in_subdirectories(directory):
    try:
        for subdir in os.listdir(directory):
            subdir_path = os.path.join(directory, subdir)
            if os.path.isdir(subdir_path):
                for filename in os.listdir(subdir_path):
                    old_file = os.path.join(subdir_path, filename)
                    if os.path.isfile(old_file) and filename.lower().endswith('.mp3'):
                        new_file = os.path.join(subdir_path, filename[:-4] + ".wav")
                        print(f"Renaming '{old_file}' to '{new_file}'")
                        os.rename(old_file, new_file)
    except Exception as e:
        print(f"An error occurred: {e}")
directory = r'Data/recordings'
replace_mp3_with_wav_in_subdirectories(directory)

Renaming 'Data/recordings\afrikaans\afrikaans2.mp3' to 'Data/recordings\afrikaans\afrikaans2.wav'
Renaming 'Data/recordings\afrikaans\afrikaans3.mp3' to 'Data/recordings\afrikaans\afrikaans3.wav'
Renaming 'Data/recordings\afrikaans\afrikaans4.mp3' to 'Data/recordings\afrikaans\afrikaans4.wav'
Renaming 'Data/recordings\afrikaans\afrikaans5.mp3' to 'Data/recordings\afrikaans\afrikaans5.wav'
Renaming 'Data/recordings\agni\agni1.mp3' to 'Data/recordings\agni\agni1.wav'
Renaming 'Data/recordings\akan\akan1.mp3' to 'Data/recordings\akan\akan1.wav'
Renaming 'Data/recordings\albanian\albanian1.mp3' to 'Data/recordings\albanian\albanian1.wav'
Renaming 'Data/recordings\albanian\albanian2.mp3' to 'Data/recordings\albanian\albanian2.wav'
Renaming 'Data/recordings\albanian\albanian3.mp3' to 'Data/recordings\albanian\albanian3.wav'
Renaming 'Data/recordings\albanian\albanian4.mp3' to 'Data/recordings\albanian\albanian4.wav'
Renaming 'Data/recordings\albanian\albanian5.mp3' to 'Data/recordings\albani

**CREATE LABELS AND RESAMPLE**

In [6]:
def labels():
    files = os.listdir(fr"Data/recordings")
    counter = -1
    X = []
    Y = []
    for i in files:
        for j in os.listdir(fr"Data/recordings/{i}"):
            X.append(fr"Data/recordings/{i}/{j}")
            Y.append(counter)
        counter+=1
    return X,Y

In [7]:
X,Y = labels()

In [8]:
X = np.array(X)
Y = np.array(Y)

In [81]:
def wave(filename, target_sample_rate=16000):
    audio, sample_rate = librosa.load(filename, sr=None)
    if sample_rate != target_sample_rate:
        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
        print(f"Resampled audio to {target_sample_rate} Hz.")  
    return audio

In [10]:
X_main = []
with tf.device("CPU/:0"):
    for i in X:
        X_main.append(wave(fr"{i}"))

Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audio to 16000 Hz.
Resampled audi

In [11]:
for i in range(len(X_main)):
    wav = X_main[i]
    wav = wav[:450000]
    zero_padding = tf.zeros([450000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    X_main[i] = wav    

In [12]:
def preprocess(wav,label): 
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram,label

**CREATE DATA PIPELINE**

In [13]:
with tf.device("CPU/:0"):
    dataset = tf.data.Dataset.from_tensor_slices((X_main,Y))

In [14]:
dataset = dataset.map(preprocess)

In [15]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

In [37]:
def process_spectrogram(spectrogram,label):
    spectrogram = tf.image.resize(spectrogram, [224, 224])
    spectrogram = tf.repeat(spectrogram, repeats=3, axis=-1)
    return spectrogram,label

In [39]:
dataset = dataset.map(lambda x,y: process_spectrogram(x,y), num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [42]:
dataset = dataset.cache()
dataset = dataset.shuffle(1000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(2)

In [43]:
train = dataset.take(55)
test = dataset.skip(55).take(14)

**CREATE CNN MODEL**

In [44]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten,Input

In [45]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [46]:
base_model.trainable = False

In [50]:
with tf.device('/CPU:0'):
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    model.add(Dense(161, activation='softmax'))

In [51]:
model.compile(optimizer = "adam",loss = "sparse_categorical_crossentropy",metrics = ["accuracy"])

In [52]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 dense (Dense)               (None, 161)               16156833  
                                                                 
Total params: 39,744,545
Trainable params: 16,156,833
Non-trainable params: 23,587,712
_________________________________________________________________


In [49]:
from tensorflow.keras.callbacks import EarlyStopping
earlystopping = EarlyStopping(patience  =5,monitor ="val_loss", restore_best_weights=True)

In [53]:
hist = model.fit(train, epochs=20, validation_data=test,workers=-1,use_multiprocessing=True,callbacks = [earlystopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


**TEST MODEL !!**

In [78]:
def predict(filepath):
    wav = wave(filepath)
    wav = wav[:450000]
    zero_padding = tf.zeros([450000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    s,l = preprocess(wav,0)
    resized_spectrogram = np.array(array_to_img(s).resize((224, 224)))
    if resized_spectrogram.ndim == 2:
        resized_spectrogram = np.stack((resized_spectrogram,) * 3, axis=-1)
    pred = np.argmax(model.predict(np.expand_dims(resized_spectrogram,axis = 0)))
    index = 0
    for i in range(len(Y)):
        if Y[i]==pred:
            index = i
            break
    print("Predicted Native Language:",X[index][16:-13])    
    

In [85]:
predict("test (mp3cut.net).wav")

Resampled audio to 16000 Hz.
Predicted Native Language: english
