In [1]:
import pandas as pd

### Import results from the different models

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
nn_dutch = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/NNModels/resultsNNDutch.csv", index_col=0)
nn_english = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/NNModels/resultsNNEnglish.csv", index_col=0)
log_reg_dutch = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/Logistic Regression/resultsLRDutch.csv", index_col=0)
log_reg_english = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/Logistic Regression/resultsLREnglish.csv", index_col=0)
xgboost_dutch = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/XGBoost/resultsXGBoostDutch.csv", index_col=0)
xgboost_english = pd.read_csv("/content/drive/MyDrive/Thesis/MLAlgorithms/XGBoost/resultsXGBoostEnglish.csv", index_col=0)

In [4]:
list_of_models_eng = [{"Name":"Logistic Regression", "Data":log_reg_english},
                      {"Name":"XGBoost", "Data":xgboost_english},
                      {"Name":"Neural Network", "Data":nn_english}]

list_of_models_dut = [{"Name":"Logistic Regression", "Data":log_reg_dutch},
                      {"Name":"XGBoost", "Data":xgboost_dutch},
                      {"Name":"Neural Network", "Data":nn_dutch}]

In [24]:
nn_dutch

Unnamed: 0,Truth,Pred,Prob
0,False,True,0.696598
1,False,True,0.772015
2,True,False,0.330871
3,True,True,0.758651
4,True,True,0.834620
...,...,...,...
579,True,True,0.659767
580,True,False,0.422824
581,False,True,0.652479
582,True,True,0.625935


### English Models

In [6]:
import plotly.express as px
from sklearn.metrics import roc_curve, roc_auc_score, precision_score, fbeta_score, accuracy_score, recall_score
import plotly.graph_objects as go

In [6]:
fig_eng = go.Figure()
fig_eng.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

In [7]:
for model in list_of_models_eng:
    fpr, tpr, _ = roc_curve(model["Data"]["Truth"], model["Data"]["Prob"])
    auc_score = roc_auc_score(model["Data"]["Truth"], model["Data"]["Prob"])
    name = f"{model['Name']} (AUC={auc_score:.2f})"
    fig_eng.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))

In [8]:
fig_eng.update_layout(
    template="plotly_white",
    title_text='ROC Curves English Models',
    title_font_size=30,
    legend = dict(font = dict(size = 18)),
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    width=1200, height=800,
    font=dict(
        family="Helvetica",
    )
)
fig_eng.update_xaxes(title_font=dict(size=22))
fig_eng.update_yaxes(title_font=dict(size=22))
fig_eng.show()

In [9]:
list_fbeta = []
list_accuracy = []
list_precision = []
list_recall = []
list_name = []
for model in list_of_models_eng:
  fbt = fbeta_score(model["Data"]["Truth"], model["Data"]["Pred"], beta=2)
  acc = accuracy_score(model["Data"]["Truth"], model["Data"]["Pred"])
  pre = precision_score(model["Data"]["Truth"], model["Data"]["Pred"])
  rec = recall_score(model["Data"]["Truth"], model["Data"]["Pred"])
  list_accuracy.append(acc)
  list_fbeta.append(fbt)
  list_recall.append(rec)
  list_precision.append(pre)
  list_name.append(model["Name"])

In [10]:
df = pd.DataFrame({"Name":list_name, "F-Beta":list_fbeta, "Accuracy":list_accuracy, "Precision":list_precision, "Recall":list_recall})

In [11]:
fig = go.Figure()
fig.add_trace(
    go.Bar(name='F-Beta',
           x=df['Name'],
           y=df['F-Beta'],
           text=round(df['F-Beta'],2),
           textposition='auto'))

fig.add_trace(
    go.Bar(name='Accuracy',
           x=df['Name'],
           y=df['Accuracy'],
           text=round(df['Accuracy'],2),
           textposition='auto',))

fig.add_trace(
    go.Bar(name='Precision',
           x=df['Name'],
           y=df['Precision'],
           text=round(df['Precision'],2),
           textposition='auto',))

fig.add_trace(
    go.Bar(name='Recall',
           x=df['Name'],
           y=df['Recall'],
           text=round(df['Recall'],2),
           textposition='auto',))

fig.update_layout(
    template="plotly_white",
    title_text='Metrics English Models',
    title_font_size=30,
    legend = dict(font = dict(size = 18)),
    width=1200, height=600,
    font=dict(
        family="Helvetica",
    )
)
fig.update_xaxes(title_font=dict(size=22))
fig.update_yaxes(title_font=dict(size=22))
fig.show()

### Dutch Models

In [12]:
fig_dut = go.Figure()
fig_dut.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

In [13]:
for model in list_of_models_dut:
    fpr, tpr, _ = roc_curve(model["Data"]["Truth"], model["Data"]["Prob"])
    auc_score = roc_auc_score(model["Data"]["Truth"], model["Data"]["Prob"])
    name = f"{model['Name']} (AUC={auc_score:.2f})"
    fig_dut.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))

In [14]:
fig_dut.update_layout(
    template="plotly_white",
    title_text='ROC Curves Dutch Models',
    title_font_size=30,
    legend = dict(font = dict(size = 18)),
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    width=1200, height=800,
    font=dict(
        family="Helvetica",
    )
)
fig_dut.update_xaxes(title_font=dict(size=22))
fig_dut.update_yaxes(title_font=dict(size=22))
fig_dut.show()

In [7]:
list_fbeta = []
list_accuracy = []
list_precision = []
list_recall = []
list_name = []
for model in list_of_models_dut:
  fbt = fbeta_score(model["Data"]["Truth"], model["Data"]["Pred"], beta=2)
  acc = accuracy_score(model["Data"]["Truth"], model["Data"]["Pred"])
  pre = precision_score(model["Data"]["Truth"], model["Data"]["Pred"])
  rec = recall_score(model["Data"]["Truth"], model["Data"]["Pred"])
  list_accuracy.append(acc)
  list_fbeta.append(fbt)
  list_recall.append(rec)
  list_precision.append(pre)
  list_name.append(model["Name"])

In [8]:
df = pd.DataFrame({"Name":list_name, "F-Beta":list_fbeta, "Accuracy":list_accuracy, "Precision":list_precision, "Recall":list_recall})

In [9]:
fig = go.Figure()
fig.add_trace(
    go.Bar(name='F-Beta',
           x=df['Name'],
           y=df['F-Beta'],
           text=round(df['F-Beta'],2),
           textposition='auto'))

fig.add_trace(
    go.Bar(name='Accuracy',
           x=df['Name'],
           y=df['Accuracy'],
           text=round(df['Accuracy'],2),
           textposition='auto',))
fig.add_trace(
    go.Bar(name='Precision',
           x=df['Name'],
           y=df['Precision'],
           text=round(df['Precision'],2),
           textposition='auto',))

fig.add_trace(
    go.Bar(name='Recall',
           x=df['Name'],
           y=df['Recall'],
           text=round(df['Recall'],2),
           textposition='auto',))


fig.update_layout(
    template="plotly_white",
    title_text='Metrics Dutch Models',
    title_font_size=30,
    legend = dict(font = dict(size = 18)),
    width=1200, height=600,
    font=dict(
        family="Helvetica",
    )
)
fig.update_xaxes(title_font=dict(size=22))
fig.update_yaxes(title_font=dict(size=22))
fig.show()

In [18]:
  q

NameError: ignored