In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
import numpy as np
import librosa

In [None]:
# def balance_data(df):
#     age_counts = df['age'].value_counts()
#     min_count = age_counts.min()
#     target_count_per_age = {age: min_count for age in age_counts.index}
#     new_data = pd.DataFrame(columns=df.columns)
#     for age, count in target_count_per_age.items():
#         age_data = df[df['age'] == age]
#         male_data = age_data[age_data['gender'] == 0].head(count // 2)
#         female_data = age_data[age_data['gender'] == 1].head(count // 2)
#         new_data = pd.concat([new_data, male_data, female_data])
#     return new_data

def balance_data(df):
    gender_counts = df['gender'].value_counts()
    gender_0_count = gender_counts[0]
    gender_1_count = gender_counts[1]
    excess_count = max(gender_0_count - gender_1_count, 0)
    if excess_count > 0:
        gender_0_df = df[df['gender'] == 0]
        sampled_gender_0_df = gender_0_df.sample(n=excess_count, weights=df['age'], replace=False)
        df = pd.concat([df[df['gender'] == 1], sampled_gender_0_df])
    return df


def clean_csv(data, files_folder, is_train):
    data = data.dropna(subset=['age', 'gender']).drop(columns=['up_votes', 'down_votes', 'accent', 'duration', 'text'])
    data["age"] = data["age"].map(
        {"teens": 10, "twenties": 20, "thirties": 30, "fourties": 40, "fifties": 50, "sixties": 60, "seventies": 70,
         "eighties": 80}).infer_objects(copy=False)
    data['filename'] = files_folder + '/' + data['filename']
    data['gender'] = data['gender'].map({'male': 0, 'female': 1, 'other': 2})
    if is_train:
        data = balance_data(data)
    return data[data['gender'] != 2]


def open_csv(folder_path, is_train):
    df = clean_csv(pd.read_csv(folder_path + ".csv"), folder_path, is_train)
    print("Dataframe: " + str(df.shape) + " \n" + str(df['gender'].value_counts()) + str(df['age'].value_counts()))
    px.bar(df.groupby(['age', 'gender']).agg({'filename': 'count'}).reset_index(), x="age", y="filename",
           color="gender", text_auto=True).update_xaxes(categoryorder="total descending").show()
    return df

In [None]:
root_path = "/kaggle/input/common-voice/"

train_df = open_csv(root_path + 'cv-valid-train', is_train=True)
test_df = open_csv(root_path + 'cv-valid-test', is_train=False)

In [None]:
batch_size = 256
sample_rate = 22050
target_size = int(5 * sample_rate)

In [None]:
def random_crop(data, center_crop=False):
    N = data.shape[0]
    if N == target_size:
        return data
    if N < target_size:
        tot_pads = target_size - N
        left_pads = int(np.ceil(tot_pads / 2))
        right_pads = int(np.floor(tot_pads / 2))
        return np.pad(data, [left_pads, right_pads], mode='constant')
    if center_crop:
        from_ = int((N / 2) - (target_size / 2))
    else:
        from_ = np.random.randint(0, np.floor(N - target_size))
    to_ = from_ + target_size
    return data[from_:to_]


def normalize(features):
    return (features - np.mean(features, axis=0)) / np.std(features, axis=0)


def load_and_preprocess(df, center_crop):
    result = []
    for file in tqdm(df['filename'].values):
        wf, sr = librosa.load(file)
        wf = random_crop(wf, center_crop)
        #Add other augmentations
        wf = librosa.feature.mfcc(y=wf, sr=sr)
        wf = normalize(wf)
        result.append(wf)
    return result

In [None]:
def create_dataset(dataframe, batch_size=batch_size, center_crop=False, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((load_and_preprocess(dataframe, center_crop), (
    dataframe['gender'].values.astype(int), dataframe['age'].values.astype(float))))
    dataset = dataset.cache()
    if shuffle:
        dataset = dataset.shuffle(buffer_size=dataset.cardinality())
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

In [None]:
train_ds = create_dataset(train_df)
test_ds = create_dataset(test_df, center_crop=True, shuffle=False)

In [None]:
ds = test_ds.take(1).as_numpy_iterator().next()
index = np.random.randint(0, batch_size, (1,))[0]
librosa.display.waveshow(ds[0][index])
print(f"Age: {ds[1][1][index]} | Gender: {'Male' if ds[1][0][index] == 0 else 'Female'}")

inp_shape = ds[0][0].shape

In [None]:
train_steps = int(np.ceil(len(train_df) / batch_size))
test_steps = int(np.ceil(len(test_df) / batch_size))

In [None]:
class CustomCRNN:

    def __CustomCRNN(self, input_shape):
        X_input = tf.keras.Input(input_shape)
        X = tf.keras.layers.Conv1D(32, 3, strides=1)(X_input)
        X = tf.keras.layers.BatchNormalization(axis=-1)(X)
        X = tf.keras.layers.Activation('relu')(X)

        X = tf.keras.layers.Conv1D(64, 3, strides=1)(X)
        X = tf.keras.layers.BatchNormalization(axis=-1)(X)
        X = tf.keras.layers.Activation('relu')(X)

        X = tf.keras.layers.Conv1D(128, 3, strides=1)(X)
        X = tf.keras.layers.BatchNormalization(axis=-1)(X)
        X = tf.keras.layers.Activation('relu')(X)

        X = tf.keras.layers.GRU(128)(X)
        return tf.keras.Model(inputs=X_input, outputs=X, name='CustomCRNN-FeatureExtractor')

    def create_model(self, input_shape):
        base_model = self.__CustomCRNN(input_shape=input_shape)
        fc = tf.keras.layers.Flatten()(base_model.output)

        # Gender output
        gender_output = tf.keras.layers.Dense(1, activation='sigmoid', name='gender_output')(fc)

        #Age output
        age_output = tf.keras.layers.Dense(1, activation='linear', name='age_output')(fc)

        return tf.keras.models.Model(inputs=base_model.input, outputs=[gender_output, age_output], name='CustomCRNN')

In [None]:
customcrnn = CustomCRNN().create_model(input_shape=inp_shape)

customcrnn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                   loss=[tf.keras.losses.BinaryCrossentropy(from_logits=False),
                         tf.keras.losses.MeanSquaredLogarithmicError()],
                   metrics=[['accuracy'], [tf.keras.metrics.MeanAbsoluteError()]],
                   loss_weights=[0.5, 0.5])

customcrnn.summary()

In [None]:
customcrnn_history = customcrnn.fit(train_ds, epochs=30, validation_data=test_ds, steps_per_epoch=train_steps,
                                    validation_steps=test_steps, verbose='auto')

In [None]:
plt.plot(customcrnn_history.history['loss'])
plt.plot(customcrnn_history.history['val_loss'])

In [None]:
plt.plot(customcrnn_history.history['gender_output_accuracy'])
plt.plot(customcrnn_history.history['val_gender_output_accuracy'])

In [None]:
plt.plot(customcrnn_history.history['age_output_mean_absolute_error'])
plt.plot(customcrnn_history.history['val_age_output_mean_absolute_error'])

In [None]:
customcrnn.save('/kaggle/working/customcrnn.keras', overwrite=True, save_format=None)

In [None]:
new_model = tf.keras.models.load_model('/kaggle/working/customcrnn.keras')

In [None]:
new_model.predict(test_ds,
                  batch_size=batch_size,
                  verbose='auto',
                  steps=test_steps)[1]