In [5]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

In [30]:
def mergeDF(file1,file2,on=["Path"]):
    #Merging two dataframes utility function
    df = pd.read_csv(file1)
    df2 = pd.read_csv(file2)

    df3 = df.merge(df2, how="left",on=on)
    print(df3.head())
    return df3

In [36]:
def createGlassbox(file,excludeCols=['path'],splits_exist =False, split_prefix="", label_col="emotion"):
    if(type(file)==str):
        df = pd.read_csv(file,index_col=False)
        df.reset_index(drop=True, inplace=True)
    else:
        df = file
    
    df = df.drop(columns=excludeCols)

    unique_subjects = df['ActorID'].unique()
    # Step 2: Shuffle and split subjects into train, test, and validation sets
    if(splits_exist):
        val_df=pd.read_csv(split_prefix+"_test_df.csv")
        purt_df=pd.read_csv(split_prefix+"_val_df.csv")
        train_df=pd.read_csv(split_prefix+"_train_df.csv")
        train_subjects=train_df["ActorID"].unique()
        val1 = purt_df["ActorID"].unique()
        val2 = val_df["ActorID"].unique()
        temp_subject = val1 + val2
    train_subjects, temp_subjects = train_test_split(unique_subjects, test_size=0.3, random_state=42)

    # Step 3: Create DataFrames for each split based on the subject IDs
    train_df = df[df['ActorID'].isin(train_subjects)]
    test_df = df[df['ActorID'].isin(temp_subjects)]


    y_train = train_df[label_col]
    y_test = test_df[label_col]


    x_train=train_df.drop(columns=[label_col,"ActorID"])
    x_test=test_df.drop(columns=[label_col,"ActorID"])

    ebm = ExplainableBoostingClassifier()
    ebm.fit(x_train, y_train)

    auc = roc_auc_score(y_test, ebm.predict_proba(x_test),multi_class='ovr')
    print(("AUC: {:.3f}".format(auc)))
    
    show(ebm.explain_global())
    return ebm


<h2>CREMA EBMs</h2>


In [6]:
createGlassbox("data/opensmileDF_crema.csv",  excludeCols = ["path","Unnamed: 0"],splits_exist =True, split_prefix="data/crema")

  warn(


AUC: 0.835


In [31]:
toExclude = ["path"]
createGlassbox("data/crema_avd.csv",  excludeCols = toExclude, splits_exist =True, split_prefix="data/crema")





AUC: 0.751


In [14]:
toExclude = ["path","Age","Sex","Race","Ethnicity"]
createGlassbox("data/crema_opensmile_avd.csv",  excludeCols = toExclude,splits_exist =True, split_prefix="data/crema")





AUC: 0.842


<h2>EMODB EMBs</h2>

In [23]:
createGlassbox("data/opensmileDF_emodb.csv",  excludeCols = ["Path","Filename","Sex","Age"],label_col="Emotion")





AUC: 0.951


In [37]:
createGlassbox("data/emodb_avd.csv",  excludeCols = ["Path"],label_col="Emotion")





AUC: 0.902


In [39]:
emodb_merge=mergeDF("data/opensmileDF_emodb.csv","data/emodb_avd.csv",on=["Path","ActorID","Emotion"])
createGlassbox(emodb_merge,  excludeCols = ["Path","Unnamed: 0"],label_col="Emotion")

   Unnamed: 0  ActorID    Emotion  \
0           0       15  happiness   
1           1       13  happiness   
2           2        9      anger   
3           3       15      anger   
4           4       14      anger   

                                                Path     Filename     Sex  \
0  /kaggle/input/berlin-database-of-emotional-spe...  15a04Fd.wav    male   
1  /kaggle/input/berlin-database-of-emotional-spe...  13b02Fb.wav  female   
2  /kaggle/input/berlin-database-of-emotional-spe...  09b01Wb.wav  female   
3  /kaggle/input/berlin-database-of-emotional-spe...  15a01Wa.wav    male   
4  /kaggle/input/berlin-database-of-emotional-spe...  14a05Wa.wav  female   

   Age  F0semitoneFrom27.5Hz_sma3nz_amean  \
0   25                          33.635345   
1   32                          37.171260   
2   21                          40.538235   
3   25                          38.678070   
4   35                          40.611958   

   F0semitoneFrom27.5Hz_sma3nz_stddevNorm  





AUC: 0.962


<h2>BEASC EBMs</h2>

In [40]:
createGlassbox("data/opensmileDF_beasc.csv",  excludeCols = ["Sentence","Repetition","Gender","Path"],label_col="Emotion")





AUC: 0.852


In [41]:
createGlassbox("data/beasc_avd.csv",  excludeCols = ["Path"],label_col="Emotion")

AUC: 0.723






In [42]:
createGlassbox("data/beasc_opensmile_avd.csv",  excludeCols = ["Gender","Path"],label_col="Emotion")





AUC: 0.858
