In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import shutil

cwd = os.getcwd()
df = pd.read_csv(cwd + "/data_csv/use_feature_data_embedded.csv")

In [2]:
df

Unnamed: 0,image,emotion,d_first_eyebrow_reye,d_first_eyebrow_leye,circum_mouth,d_bottom_mouth_chin
0,facial-expressions_2868585k.jpg,7,28.460499,29.068884,122.200822,27.073973
1,facial-expressions_2868582k.jpg,3,24.698178,24.839485,121.277925,31.064449
2,Aaron_Peirsol_0003.jpg,4,23.769729,25.495098,131.053001,44.553339
3,Aaron_Sorkin_0002.jpg,4,25.553865,21.954498,176.923136,37.121422
4,Abdel_Nasser_Assidi_0002.jpg,5,25.709920,27.658633,94.492867,51.662365
...,...,...,...,...,...,...
24068,SURPRISE/surprise (971).png,7,50.219518,47.095647,132.779479,45.398238
24069,SURPRISE/surprise (978).jpg,7,39.293765,37.483330,100.903171,25.079872
24070,SURPRISE/surprise (981).png,7,30.000000,25.079872,162.066050,21.931712
24071,SURPRISE/surprise (983).jpg,7,43.566042,43.011626,118.447685,22.203603


In [None]:
# create graph folder
dir_img = cwd + '/graph'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']
for emo in dictionary:
    os.makedirs(dir_img + "/" + emo)

# create matrix folder
dir_img = cwd + '/matrix'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']
for emo in dictionary:
    os.makedirs(dir_img + "/" + emo)

In [5]:
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = svm.SVC(kernel='linear', C=1, probability=True)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)

    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/svm_linear_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/svm_linear_emb.jpg')

    print('\n\n')

ANGER


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



accuracy_score : 0.8887606738979922
log_loss : 0.32152557136865095



In [None]:
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = svm.SVC(kernel='poly', C=1, probability=True)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)
        
    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/svm_poly_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/svm_poly_emb.jpg')

In [None]:
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = svm.SVC(kernel='rbf', C=1, probability=True)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)

    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/svm_rbf_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/svm_rbf_emb.jpg')

In [None]:
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = LogisticRegression()

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)
    
    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/logreg_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/logreg_emb.jpg')

In [None]:
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = RandomForestClassifier(n_estimators=40)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)
    
    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/rf_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/rf_emb.jpg')

In [None]:
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# create model
clf = GaussianNB()

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']

for emo in range(8):
    print(dictionary[emo])

    emo_feature = []
    emo_target = []

    for i in range(len(df['emotion'])):
        if df['emotion'][i] == emo:
            emo_target.append(1)
        else:
            emo_target.append(0)
            
        data = []
        for j in range(2, len(df.columns)):
            data.append(df._get_value(i, j, takeable = True))
        emo_feature.append(data)

    emo_feature = np.array(emo_feature)
    emo_target = np.array(emo_target)

    # import data
    X = emo_feature
    y = emo_target

    # add noisy features
    random_state = np.random.RandomState(0)

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

    history = []
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    fig, ax = plt.subplots()

    # Run classifier with cross-validation and plot ROC curves
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    for i, (train, val) in enumerate(cv.split(X_train, y_train)):
        X_train, X_val = X[train], X[val]
        y_train, y_val = y[train], y[val]

        # fit model
        clf.fit(X_train, y_train)

        # predict
        predict = clf.predict(X_val)
        clf_probs = clf.predict_proba(X_val)

        # store model report in history list
        history.append(classification_report(y_val, predict))

        # get loss and accuracy
        acc = accuracy_score(y_val, predict)
        loss = log_loss(y_val, clf_probs)
        print(f'====================Fold {i}====================', '\n')
        print(f"accuracy_score : {acc}")
        print(f"log_loss : {loss}\n")

        # plot ROC curve
        viz = RocCurveDisplay.from_estimator(clf, X_val, y_val, name="ROC fold {}".format(i), alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    # middle line
    ax.plot([0, 1], [0, 1], 'k--')

    # mean line
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(
        mean_fpr,
        mean_tpr,
        color="b",
        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
        lw=2,
        alpha=0.8,
    )

    # std
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(
        mean_fpr,
        tprs_lower,
        tprs_upper,
        color="grey",
        alpha=0.2,
        label=r"$\pm$ 1 std. dev.",
    )

    ax.set(xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig('graph/' + dictionary[emo] + '/nb_emb.jpg')
    plt.show()

    # for i in range(len(history)):
        # print(f'=========================Fold {i+1}=========================')
        # print(history[i])

    predict = clf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predict).ravel()
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    print(f"specificity: {specificity}\n")
    print(f"sensitivity: {sensitivity}\n")
    print(f"accuracy : {accuracy_score(y_test, predict)}\n")

    # report
    print(classification_report(y_test, predict))
    print(accuracy_score(y_test, predict))

    # plot cofusion matrix
    plot_confusion_matrix(clf, X_test, y_test, cmap='YlOrRd')
    plt.savefig('matrix/' + dictionary[emo] + '/nb_emb.jpg')