In [1]:
!pip install interpret

Collecting interpret
  Downloading interpret-0.6.1-py3-none-any.whl.metadata (1.1 kB)
Collecting interpret-core==0.6.1 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.1->interpret)
  Downloading interpret_core-0.6.1-py3-none-any.whl.metadata (2.8 kB)
Collecting dash>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.1->interpret)
  Downloading dash-2.17.1-py3-none-any.whl.metadata (10 kB)
Collecting dash-core-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.1->interpret)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-html-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.1->interpret)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-table>=4.1.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.1->interpret)
  Downloa

In [2]:
import os
if os.environ.get('PWD') == '/kaggle/working':
    os.chdir('/kaggle/input/cusser-data')

In [3]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

In [4]:
def createGlassbox(filename,excludeCols=['path']):
    df = pd.read_csv(filename,index_col=False)
    df.reset_index(drop=True, inplace=True)
    df = df.drop(columns=excludeCols)

    unique_subjects = df['ActorID'].unique()
    # Step 2: Shuffle and split subjects into train, test, and validation sets
    train_subjects, temp_subjects = train_test_split(unique_subjects, test_size=0.3, random_state=42)

    # Step 3: Create DataFrames for each split based on the subject IDs
    train_df = df[df['ActorID'].isin(train_subjects)]
    test_df = df[df['ActorID'].isin(temp_subjects)]


    y_train = train_df["emotion"]
    y_test = test_df["emotion"]


    x_train=train_df.drop(columns=["emotion","ActorID"])
    x_test=test_df.drop(columns=["emotion","ActorID"])


    ebm = ExplainableBoostingClassifier()
    ebm.fit(x_train, y_train)

    auc = roc_auc_score(y_test, ebm.predict_proba(x_test),multi_class='ovr')
    print(("AUC: {:.3f}".format(auc)))
    
    show(ebm.explain_global())
    return ebm


In [None]:
createGlassbox("data/opensmileDF_functional.csv",  excludeCols = ["path","Unnamed: 0"])

In [None]:
createGlassbox("data/crema_avd.csv")

In [None]:
#Combining Opensmile and AVD features

df1 = pd.read_csv("data/crema_avd.csv")
df2 = pd.read_csv("data/opensmileDF_functional.csv")

df3 = df1.merge(df2,how="left", on=["ActorID","emotion","path"])
df3=df3.drop(columns="Unnamed: 0")
df3.head()
df3.to_csv("data/crema_opensmile_avd.csv",index=False)

In [None]:
toExclude = ["path","Age","Sex","Race","Ethnicity"]
createGlassbox("data/crema_opensmile_avd.csv",excludeCols =toExclude)