# IMPORTING LIBRARIES

In [100]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import mlflow
import mlflow.sklearn

import sys
import json
import pickle
import glob 
import os
import warnings

In [101]:
import librosa

import tensorflow as tf
import keras
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

from sklearn.metrics import confusion_matrix,accuracy_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [102]:
warnings.filterwarnings(action="ignore")

np.random.seed(42)

%matplotlib inline

In [116]:
# # mlflow server --backend-store-uri mlruns/ --default-artifact-root mlruns/ --host 0.0.0.0 --port 5000
# remote_server_uri = "http://0.0.0.0:5000"
# mlflow.set_tracking_uri(remote_server_uri)
# # exp_name = "audio_emotion"
# # mlflow.set_experiment(exp_name)

# READING DATASET

In [104]:
def load_data_SAVEE(
    SAVEE = "data/raw/SAVEE"
):
    dir_list = sorted(os.listdir(SAVEE))

    emotion=[]
    path = []
    for i in dir_list:
        if i[-8:-6]=='_a':
            emotion.append('male_angry')
        elif i[-8:-6]=='_d':
            emotion.append('male_disgust')
        elif i[-8:-6]=='_f':
            emotion.append('male_fear')
        elif i[-8:-6]=='_h':
            emotion.append('male_happy')
        elif i[-8:-6]=='_n':
            emotion.append('male_neutral')
        elif i[-8:-6]=='sa':
            emotion.append('male_sad')
        elif i[-8:-6]=='su':
            emotion.append('male_surprise')
        else:
            emotion.append('male_error') 
        path.append(SAVEE + i)

    SAVEE_df = pd.DataFrame(emotion, columns = ['labels'])
    SAVEE_df['source'] = 'SAVEE'
    SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['path'])], axis = 1)
    
    mlflow.log_artifact("SAVEE_nrows", len(SAVEE_df))

    return SAVEE_df

In [105]:
def load_data_RAVDESS(
    RAVDESS = "data/raw/RAVDESS"
):
    dir_list = sorted(os.listdir(RAVDESS))

    emotion = []
    gender = []
    path = []
    for i in dir_list:
        fname = os.listdir(RAVDESS + i)
        for f in fname:
            part = f.split('.')[0].split('-')
            emotion.append(int(part[2]))
            temp = int(part[6])
            if temp%2 == 0:
                temp = "female"
            else:
                temp = "male"
            gender.append(temp)
            path.append(RAVDESS + i + '/' + f)

        
    RAVDESS_df = pd.DataFrame(emotion)
    RAVDESS_df = RAVDESS_df.replace(
        {1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}
    )
    RAVDESS_df = pd.concat([pd.DataFrame(gender),RAVDESS_df],axis=1)
    RAVDESS_df.columns = ['gender','emotion']
    RAVDESS_df['labels'] =RAVDESS_df.gender + '_' + RAVDESS_df.emotion
    RAVDESS_df['source'] = 'RAVDESS'  
    RAVDESS_df = pd.concat([RAVDESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
    RAVDESS_df = RAVDESS_df.drop(['gender', 'emotion'], axis=1)
    
    mlflow.log_artifact("RAVDESS_nrows", len(RAVDESS_df))
    
    return RAVDESS_df

In [106]:
def load_data_TESS(
    TESS = "data/raw/TESS"
):
    dir_list=sorted(os.listdir(TESS))
    path = []
    emotion = []

    for i in dir_list:
        fname = os.listdir(TESS + i)
        for f in fname:
            if i == 'OAF_angry' or i == 'YAF_angry':
                emotion.append('female_angry')
            elif i == 'OAF_disgust' or i == 'YAF_disgust':
                emotion.append('female_disgust')
            elif i == 'OAF_Fear' or i == 'YAF_fear':
                emotion.append('female_fear')
            elif i == 'OAF_happy' or i == 'YAF_happy':
                emotion.append('female_happy')
            elif i == 'OAF_neutral' or i == 'YAF_neutral':
                emotion.append('female_neutral')                                
            elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
                emotion.append('female_surprise')               
            elif i == 'OAF_Sad' or i == 'YAF_sad':
                emotion.append('female_sad')
            else:
                emotion.append('Unknown')
            path.append(TESS + i + "/" + f)

    TESS_df = pd.DataFrame(emotion, columns = ['labels'])
    TESS_df['source'] = 'TESS'
    TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
    
    mlflow.log_artifact("TESS_nrows", len(TESS_df))
    
    return TESS_df

In [107]:
def load_data_CREMA(
    CREMA = "data/raw/CREMA"
):
    dir_list = sorted(os.listdir(CREMA))

    gender = []
    emotion = []
    path = []
    female = [
        1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,
        1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,1052,
        1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,
        1076,1078,1079,1082,1084,1089,1091
    ]

    for i in dir_list: 
        part = i.split('_')
        if int(part[0]) in female:
            temp = 'female'
        else:
            temp = 'male'
        gender.append(temp)
        if part[2] == 'SAD' and temp == 'male':
            emotion.append('male_sad')
        elif part[2] == 'ANG' and temp == 'male':
            emotion.append('male_angry')
        elif part[2] == 'DIS' and temp == 'male':
            emotion.append('male_disgust')
        elif part[2] == 'FEA' and temp == 'male':
            emotion.append('male_fear')
        elif part[2] == 'HAP' and temp == 'male':
            emotion.append('male_happy')
        elif part[2] == 'NEU' and temp == 'male':
            emotion.append('male_neutral')
        elif part[2] == 'SAD' and temp == 'female':
            emotion.append('female_sad')
        elif part[2] == 'ANG' and temp == 'female':
            emotion.append('female_angry')
        elif part[2] == 'DIS' and temp == 'female':
            emotion.append('female_disgust')
        elif part[2] == 'FEA' and temp == 'female':
            emotion.append('female_fear')
        elif part[2] == 'HAP' and temp == 'female':
            emotion.append('female_happy')
        elif part[2] == 'NEU' and temp == 'female':
            emotion.append('female_neutral')
        else:
            emotion.append('Unknown')
        path.append(CREMA + i)
    
    CREMA_df = pd.DataFrame(emotion, columns = ['labels'])
    CREMA_df['source'] = 'CREMA'
    CREMA_df = pd.concat([CREMA_df,pd.DataFrame(path, columns = ['path'])],axis=1)
    
    mlflow.log_artifact("CREMA_nrows", len(CREMA_df))
    
    return CREMA_df

In [108]:
def load_data():
    with mlflow.start_run(run_name= "Data_loading", nested=True) as child_run_load:
        SAVEE_df = load_data_SAVEE()
        RAVDESS_df = load_data_RAVDESS()
        TESS_df = load_data_TESS()
        CREMA_df = load_data_CREMA()
        
        ref = pd.concat(
            [SAVEE_df, RAVDESS_df, TESS_df, CREMA_df],
            axis = 0
        )
        
    return ref

# FEATURE EXTRACTION

In [109]:
def feature_extraction(ref):
    with mlflow.start_run(run_name= "Feature_extraction", nested=True) as child_run_load:
        df = pd.DataFrame(columns=['feature'])
        
        counter=0
        for index,path in enumerate(ref.path):
            X, sample_rate = librosa.load(path, res_type='kaiser_fast',duration=2.5,sr=44100,offset=0.5)
            sample_rate = np.array(sample_rate)
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13),axis=0)
            df.loc[counter] = [mfccs]
            counter=counter+1   
    
        ref.reset_index(inplace=True, drop=True)
        df = pd.concat([ref,pd.DataFrame(df['feature'].values.tolist())],axis=1)
        df=df.fillna(0)
        
        mlflow.log_artifact("Sample_Rate", sample_rate)
        mlflow.log_artifact("df_nrows", len(df))
        mlflow.log_artifact("df_ncols", df.shape[-1])
        
        return df

# TRAIN TEST SPLIT

In [110]:
def df_split(df):
    with mlflow.start_run(run_name= "Data_split", nested=True) as child_run_load:
        X_train, X_test, y_train, y_test = train_test_split(
            df.drop(
                ['path','labels','source'],axis=1
            ),
            df.labels,
            test_size=0.2,
            shuffle=True,
            random_state=42
        )
        mlflow.log_artifact("train_nrows", len(X_train))
        mlflow.log_artifact("test_nrows", len(X_test))

    return X_train, X_test, y_train, y_test

# NORMALIZATION

In [111]:
def normalization(X_train, X_test):
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)

    X_train = (X_train - mean)/std
    X_test = (X_test - mean)/std
    return X_train, X_test

# MODELLING

### BASELINE TEST

In [112]:
def eval_matrics(model, train_x, train_y):
    #function to get r2 score using cross_val_score
    scores = cross_val_score(model, train_x, train_y, 
                            scoring= "r2", cv= 10)
    return scores.mean()

In [113]:
def baseline_test(X_train, y_train):
    models = {
        "Linear_reg": LinearRegression(),
        "Decision_tree": DecisionTreeRegressor(),
        "Random_forest": RandomForestRegressor(
            max_features="log2",
            max_depth=10,
            max_leaf_nodes=100,
            min_samples_leaf=3,
            min_samples_split=20,
            n_estimators=22000,
            random_state=42
        )
    }
    with mlflow.start_run(run_name= "Basic_model", nested=True) as child_run_basic:
        for model in models:
            mlflow.log_metric(
                f"{model}_R2_Score", 
                eval_matrics(models[model], X_train, y_train)
            )

In [114]:
def seq_model(X_train, X_test, y_train, y_test):
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    # one hot encode the target 
    lb = LabelEncoder()
    y_train = np_utils.to_categorical(lb.fit_transform(y_train))
    y_test = np_utils.to_categorical(lb.fit_transform(y_test))
    
    with open("data_description.txt", 'w') as f:
        f.write(f"{lb}")
    print(lb.classes_)
    
    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)
    
    
    # New model
    model = Sequential()
    model.add(Conv1D(256, 8, padding='same',input_shape=(X_train.shape[1],1)))  # X_train.shape[1] = No. of Columns
    model.add(Activation('relu'))
    model.add(Conv1D(256, 8, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(MaxPooling1D(pool_size=(8)))
    model.add(Conv1D(128, 8, padding='same'))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 8, padding='same'))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 8, padding='same'))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 8, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(MaxPooling1D(pool_size=(8)))
    model.add(Conv1D(64, 8, padding='same'))
    model.add(Activation('relu'))
    model.add(Conv1D(64, 8, padding='same'))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(14))
    model.add(Activation('softmax'))

    opt = tf.keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
    
    with open("model_summary.txt", 'w') as f:
        f.write(f"""
        {model.summary()}
        """)
    
    
    with mlflow.start_run(run_name= "Seq_model", nested= True):
        mlflow.log_artifact("train_ncols", X_train.shape[-1])
        mlflow.log_artifact("data_description.txt")
        mlflow.log_artifact("model_summary.txt")
        
    model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
    model_history=model.fit(X_train, y_train, batch_size=16, epochs=80, validation_data=(X_test, y_test))
    
    return model

# MAIN

In [117]:
def main():
    remote_server_uri = "http://0.0.0.0:5000"
    mlflow.set_tracking_uri(remote_server_uri)
    
    with mlflow.start_run(run_name= "ML_LIFECYCLE") as parent_run:
        ref = load_data()
        df = feature_extraction(ref)
        X_train, X_test, y_train, y_test = df_split(df)
        X_train, X_test = normalization(X_train, X_test)
        baseline_test(X_train, y_train)
        model = seq_model(X_train, X_test, y_train, y_test)

        mlflow.sklearn.log_model(final_model, "model")

In [118]:
main()

MlflowException: API request to http://0.0.0.0:5000/api/2.0/mlflow/runs/create failed with exception HTTPConnectionPool(host='0.0.0.0', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/create (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000022C9FE0B070>: Failed to establish a new connection: [WinError 10049] The requested address is not valid in its context'))