# Accent Localizer

Read README.md for problem description.

### Imports

In [1]:
import os
from datetime import datetime

import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import sklearn

### Sample some data

In [2]:
# df = pd.read_csv(os.path.join('__dataset', 'validated_regions.tsv'), sep='\t')
# df.head(10)

### Get the spectrogram of an audio file

In [3]:
def get_spectrogram(path, sampling_rate = 48000, display = True):
    # Load an audio file as a floating point time series.
    audio , _ = librosa.load(path, sr=sampling_rate)

    # Short-time Fourier transform (STFT).
    stft = abs(librosa.stft(audio))

    # Convert an amplitude spectrogram to dB-scaled spectrogram.
    spectrogram = librosa.amplitude_to_db(stft)

    if display:
        plt.figure(figsize=(9, 3))
        librosa.display.specshow(spectrogram, sr=sampling_rate, x_axis='time', y_axis='log')
        plt.colorbar()
    return spectrogram


# _ = get_spectrogram(os.path.join('__dataset', 'clips', df.iloc[0]['path']))

### Extract features using mfcc

In [4]:
def extract_feature(path, sampling_rate = 48000):
    features = []
    audio, _ = librosa.load(path, sr=sampling_rate)

    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sampling_rate))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sampling_rate))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sampling_rate))
    features.append(spectral_centroid)
    features.append(spectral_bandwidth)
    features.append(spectral_rolloff)
    
    mfcc = librosa.feature.mfcc(y=audio, sr=sampling_rate)
    for el in mfcc:
        features.append(np.mean(el))
    
    return np.asarray(features, dtype=float)

# features = extract_feature(os.path.join('__dataset', 'clips', df.iloc[0]['path']))
# print(features)
# print(features.shape)

### Create a new features file

In [5]:
# Create a new file named feature_regions.tsv

def create_header():
    header = ['path', 'region', 'spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff']
    for i in range(1, 21):
        header.append(f'mfcc{i}')
    return header

# Check if the file exists
if False and not os.path.exists(os.path.join('__dataset', 'feature_regions.tsv')):
    header = create_header()
    # Save the header to the new file
    with open(os.path.join('__dataset', 'feature_regions.tsv'), 'w') as f:
        f.write('\t'.join(header) + '\n')

### Add the new features to the dataset

In [7]:
if False:
    # Read the validated_regions.tsv file
    df_val = pd.read_csv(os.path.join('__dataset', 'validated_regions.tsv'), sep='\t')

    # Read the feature_regions.tsv file
    df_feat = pd.read_csv(os.path.join('__dataset', 'feature_regions.tsv'), sep='\t')

    # Check the last row of the feature_regions.tsv file where we left off
    # Locate the same row in the validated_regions.tsv file
    # Start from the next row
    start = 0
    if len(df_feat) > 0:
        last_row = df_feat.iloc[-1]
        for i, row in df_val.iterrows():
            if row['path'] == last_row['path'] and row['region'] == last_row['region']:
                start = i + 1
                break

    # Extract features for each row in the validated_regions.tsv file
    now = datetime.now()
    try:
        for i, row in df_val.iloc[start:].iterrows():
            features = list()
            features.append(row['path'])
            features.append(row['region'])
            features.extend(extract_feature(os.path.join('__dataset', 'clips', row['path'])))

            # Convert the list to a pandas Series
            features_series = pd.Series(features, index=df_feat.columns)
            # Add this feature row to the feature_regions.tsv file
            df_feat = pd.concat([df_feat, features_series.to_frame().T], ignore_index=True)

    except KeyboardInterrupt as e:
        print("Interrupted by User, saving the progress...")

    finally:
        # Save the extracted features to the feature_regions.tsv file
        if not df_feat.empty:
            df_feat.to_csv(os.path.join('__dataset', 'feature_regions.tsv'), sep='\t', index=False)
        print(f'Processed {i - start} rows (Currently at {i}) in {datetime.now() - now} seconds.')



  df_val = pd.read_csv(os.path.join('__dataset', 'validated_regions.tsv'), sep='\t')


Processed -1 rows (Currently at 981895) in 0:00:20.408512 seconds.


### Encode labels

In [10]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

data = pd.read_csv(os.path.join('__dataset', 'feature_regions.tsv'), sep='\t')

def get_labels(data):
    labels = data.iloc[:, 1]
    encoder = LabelEncoder()
    labels = encoder.fit_transform(labels)
    return labels, encoder

y, encoder = get_labels(data)
labels = encoder.classes_
#print("labels: ", labels)

labels:  ['AUS' 'CAB' 'CEL' 'EAF' 'EAU' 'ENG' 'ETA' 'EU' 'FR' 'GER' 'HIS' 'IN' 'ME'
 'SEA' 'US' 'WAF' 'ZA']


### Scaling features

In [11]:
def scale_features(data):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(np.array(data.iloc[:, 2:], dtype=float))

        
    return scaled_data, scaler

x, scaler = scale_features(data)

labels:  ['AUS' 'CAB' 'CEL' 'EAF' 'EAU' 'ENG' 'ETA' 'EU' 'FR' 'GER' 'HIS' 'IN' 'ME'
 'SEA' 'US' 'WAF' 'ZA']


### Train Test Validation Split

In [13]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

print(x_train.shape, x_test.shape, x_val.shape)

(785516, 23) (98190, 23) (98190, 23)


### Classification Model!

In [21]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

model = Sequential()
model.add(Conv1D(128, kernel_size=2, activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Conv1D(256, kernel_size=2, activation='relu'))
model.add(MaxPooling1D(pool_size=1))
model.add(Conv1D(512, kernel_size=2, activation='relu'))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(labels), activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

  super().__init__(


None


### Start Training

In [None]:
epochs = 50
batch_size = 32
history = model.fit(x_train.reshape(x_train.shape[0], x_train.shape[1], 1), y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val.reshape(x_val.shape[0], x_val.shape[1], 1), y_val))