# Title Goes Here
Names names names

In [146]:
# Imports
from os import path
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import librosa
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker

In [147]:
# Load index file
root_path: str = '/home/jorb/voice-data/en'
index_path: str = path.join(
    root_path, 'resampled_validated_with_durations.csv')
assert path.exists(index_path)

index_df: pd.DataFrame = pd.read_csv(index_path)

In [148]:
# Resource fn def

def mp3_to_png(filename: str, overwrite: bool = False) -> str:
    '''
    If the given mp3 file does not already have a saved
    spectrograph image, creates it. Either way, returns the path
    of the image.
    :param filename: The mp3 file to convert
    :param overwrite: If true, never fetches from cache
    :returns: The filepath of the spectograph version
    '''

    out: str = filename + '.png'
    if path.exists(out) and not overwrite:
        return out

    X, sample_rate = librosa.load(filename, res_type='kaiser_fast')
    S = librosa.feature.melspectrogram(y=X, sr=sample_rate)

    plt.clf()
    ax = plt.axes()

    librosa.display.specshow(
        librosa.power_to_db(S, ref=np.max), x_axis='time',
        y_axis='mel', fmin=50, fmax=280, cmap='gray')

    plt.gcf().set_dpi(64)
    plt.gca().set_position((0, 0, 1, 1))

    plt.savefig(out)

    return out


In [149]:
# Image-ify all of the things
# This cell will take a long time: At least an hour
# If cancelled, progress will be saved.

n: int = len(index_df)
for i, row in index_df.iterrows():
    filename: str = path.join(root_path, 'clips', row['path'])
    percent_done: float = round(100.0 * (i / n), 2)
    print(f'{percent_done}%\tdone...')
    mp3_to_png(filename)


0.0%	done...
0.02%	done...
0.03%	done...
0.05%	done...
0.06%	done...
0.08%	done...
0.1%	done...
0.11%	done...
0.13%	done...
0.14%	done...
0.16%	done...
0.17%	done...
0.19%	done...
0.21%	done...
0.22%	done...
0.24%	done...
0.25%	done...
0.27%	done...
0.29%	done...
0.3%	done...
0.32%	done...
0.33%	done...
0.35%	done...
0.37%	done...
0.38%	done...
0.4%	done...
0.41%	done...
0.43%	done...
0.45%	done...
0.46%	done...
0.48%	done...
0.49%	done...
0.51%	done...
0.52%	done...
0.54%	done...
0.56%	done...
0.57%	done...
0.59%	done...
0.6%	done...
0.62%	done...
0.64%	done...
0.65%	done...
0.67%	done...
0.68%	done...
0.7%	done...
0.72%	done...
0.73%	done...
0.75%	done...
0.76%	done...
0.78%	done...
0.8%	done...
0.81%	done...
0.83%	done...
0.84%	done...
0.86%	done...
0.87%	done...
0.89%	done...
0.91%	done...
0.92%	done...
0.94%	done...
0.95%	done...
0.97%	done...
0.99%	done...
1.0%	done...
1.02%	done...
1.03%	done...
1.05%	done...
1.07%	done...
1.08%	done...
1.1%	done...
1.11%	done...
1.13%	done...
1

In [150]:
# Prepare dataset

# NOTE: X is just filepaths as strs!
X = index_df['path'].apply(
    lambda n: path.join(root_path, 'clips', f'{n}.png'))
y = index_df['gender']

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1234)

for row in X_train:
    print(row)

/home/jorb/voice-data/en/clips/common_voice_en_9793.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_35281.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_557410.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_20967.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_9092.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_31066762.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_32987475.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_44650.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_37153410.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_38352401.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_32534478.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_13327.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_19020.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_36498460.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_42022.mp3.png
/home/jorb/voice-data/en/clips/common_voice_en_96259.mp3.png
/home/j

In [None]:
# Model construction