In [2]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

In [14]:
def createGlassbox(filename,excludeCols=['path']):
    df = pd.read_csv(filename,index_col=False)
    df.reset_index(drop=True, inplace=True)
    df = df.drop(columns=excludeCols)

    unique_subjects = df['ActorID'].unique()
    # Step 2: Shuffle and split subjects into train, test, and validation sets
    train_subjects, temp_subjects = train_test_split(unique_subjects, test_size=0.3, random_state=42)

    # Step 3: Create DataFrames for each split based on the subject IDs
    train_df = df[df['ActorID'].isin(train_subjects)]
    test_df = df[df['ActorID'].isin(temp_subjects)]


    y_train = train_df["emotion"]
    y_test = test_df["emotion"]


    x_train=train_df.drop(columns=["emotion","ActorID"])
    x_test=test_df.drop(columns=["emotion","ActorID"])


    ebm = ExplainableBoostingClassifier()
    ebm.fit(x_train, y_train)

    auc = roc_auc_score(y_test, ebm.predict_proba(x_test),multi_class='ovr')
    print(("AUC: {:.3f}".format(auc)))
    
    show(ebm.explain_global())
    return ebm


In [24]:
createGlassbox("data/opensmileDF_functional.csv",  excludeCols = ["path","Unnamed: 0"])





AUC: 0.835


In [15]:
createGlassbox("data/crema_avd.csv")


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.



AUC: 0.751






In [20]:
#Combining Opensmile and AVD features

df1 = pd.read_csv("data/crema_avd.csv")
df2 = pd.read_csv("data/opensmileDF_functional.csv")

df3 = df1.merge(df2,how="left", on=["ActorID","emotion","path"])
df3=df3.drop(columns="Unnamed: 0")
df3.head()
df3.to_csv("data/crema_opensmile_avd.csv",index=False)

In [23]:
toExclude = ["path","Age","Sex","Race","Ethnicity"]
createGlassbox("data/crema_opensmile_avd.csv",excludeCols =toExclude)





AUC: 0.844
