In [None]:
from ipywidgets import interact, widgets

# Input Subtask
print("1: Toxic Comment Classification\n2: Engaging Comment Classification\n3: Fact Claiming Comment Classification\nChoose Subtask:")
TASK_NUMBER = int(input())

In [None]:
!pip install -q ernie

import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score, confusion_matrix, plot_roc_curve
import matplotlib.pyplot as plt
from ernie import SentenceClassifier, Models, clean_autosave

def evaluation_result(true, predicted):
  print(f"Accuracy Score: {round(accuracy_score(true,predicted) * 100,2)} %\n")
  # accuracy
  accuracy = accuracy_score(true,predicted)
  print('Accuracy: %f' % accuracy)
  # macro f1 
  f1 = f1_score(true,predicted, average='macro')
  print('F1 score: %f' % f1)
  # ROC AUC
  auc = roc_auc_score(true,predicted)
  print('ROC AUC: %f' % auc)
  # precision
  precision = precision_score(true,predicted)
  print('Precision: %f' % precision)
  # recall
  recall = recall_score(true,predicted)
  print('Recall: %f' % recall)
  # confusion matrix
  matrix = confusion_matrix(true,predicted)
  print(matrix)
  # classification report
  report = classification_report(true,predicted)
  print(report)
  print(" ")

In [None]:
# load training dataset
df = pd.read_csv('GermEval21_Translated_final.csv')
df.columns
df['English_Google'] = df['English_Google'].fillna(df['comment_text'])
df['English_Google'] = df['English_Google'].apply(lambda x: np.str_(x))

# load test dataset
test_df = pd.read_csv('GermEval21_Test_Translated.csv')
test_df.columns
test_df['English_Google'] = test_df['English_Google'].fillna(test_df['c_text'])
test_df['English_Google'] = test_df['English_Google'].apply(lambda x: np.str_(x))

# load final test labels
label_df = pd.read_csv('truth.csv')

In [None]:
# Split training dataset
if TASK_NUMBER == 1:
    df_new = df.filter(["English_Google", "Sub1_Toxic"])
    df_new = df_new.rename(columns={'English_Google' : '0'})
    df_new = df_new.rename(columns={'Sub1_Toxic' : '1'})
    trainn, testn = train_test_split(df_new, test_size=0.1, random_state=101, stratify=df_new['1'])
    df_ger = df.filter(["comment_text", "Sub1_Toxic"])
    df_ger = df_ger.rename(columns={'comment_text' : '0'})
    df_ger = df_ger.rename(columns={'Sub1_Toxic' : '1'})
    traing, testg = train_test_split(df_ger, test_size=0.1, random_state=101, stratify=df_ger['1'])
elif TASK_NUMBER == 2:
    df_new = df.filter(["English_Google", "Sub2_Engaging"])
    df_new = df_new.rename(columns={'English_Google' : '0'})
    df_new = df_new.rename(columns={'Sub2_Engaging' : '1'})
    trainn, testn = train_test_split(df_new, test_size=0.1, random_state=101, stratify=df_new['1'])
    df_ger = df.filter(["comment_text", "Sub2_Engaging"])
    df_ger = df_ger.rename(columns={'comment_text' : '0'})
    df_ger = df_ger.rename(columns={'Sub2_Engaging' : '1'})
    traing, testg = train_test_split(df_ger, test_size=0.1, random_state=101, stratify=df_ger['1'])
elif TASK_NUMBER == 3:
    df_new = df.filter(["English_Google", "Sub3_FactClaiming"])
    df_new = df_new.rename(columns={'English_Google' : '0'})
    df_new = df_new.rename(columns={'Sub3_FactClaiming' : '1'})
    trainn, testn = train_test_split(df_new, test_size=0.1, random_state=101, stratify=df_new['1'])
    df_ger = df.filter(["comment_text", "Sub3_FactClaiming"])
    df_ger = df_ger.rename(columns={'comment_text' : '0'})
    df_ger = df_ger.rename(columns={'Sub3_FactClaiming' : '1'})
    traing, testg = train_test_split(df_ger, test_size=0.1, random_state=101, stratify=df_ger['1'])

# rename truth dataset
if TASK_NUMBER == 1:
    label_df = label_df.filter(["Sub1_Toxic"])
    label_df = label_df.rename(columns={'Sub1_Toxic' : '1'})
elif TASK_NUMBER == 2:
    label_df = label_df.filter(["Sub2_Engaging"])
    label_df = label_df.rename(columns={'Sub2_Engaging' : '1'})
elif TASK_NUMBER == 3:
    label_df = label_df.filter(["Sub3_FactClaiming"])
    label_df = label_df.rename(columns={'Sub3_FactClaiming' : '1'})
    
# BERT Base Uncased (epochs=3)
print('='*50)
print('BERT Base Uncased (epochs=3)') 
print('-'*50)
print(" ")
classifier = SentenceClassifier(model_name=Models.BertBaseUncased, max_length=128)
classifier.load_dataset(trainn, validation_split=0.1, stratify=trainn['1'])
classifier.fine_tune(epochs=3)
bertb_eval = [np.round(classifier.predict_one(each), decimals=0) for each in testn['0']]
true = testn['1'].tolist()
lst = []
for arr in bertb_eval:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
bertb = [np.round(classifier.predict_one(each), decimals=0) for each in test_df['English_Google']]
true = label_df['1'].tolist()
lst = []
for arr in bertb:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
clean_autosave()
print(" ")

In [None]:
# mBERT Base Cased (epochs=3)
print('='*50)
print('mBERT Base Cased (epochs=3)') 
print('-'*50)
print(" ")
classifier = SentenceClassifier(model_name='bert-base-multilingual-cased', max_length=128)
classifier.load_dataset(trainn, validation_split=0.1, stratify=trainn['1'])
classifier.fine_tune(epochs=3)
mbertc_eval = [np.round(classifier.predict_one(each), decimals=0) for each in testn['0']]
true = testn['1'].tolist()
lst = []
for arr in mbertc_eval:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
mbertc = [np.round(classifier.predict_one(each), decimals=0) for each in test_df['English_Google']]
true = label_df['1'].tolist()
lst = []
for arr in mbertc:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
clean_autosave()
print(" ")

In [None]:
# DBMDZ GermanBERT Base Cased (epochs=3)
print('='*50)
print('DBMDZ GermanBERT Base Cased (epochs=3)') 
print('-'*50)
print(" ")
classifier = SentenceClassifier(model_name='dbmdz/bert-base-german-cased', max_length=128)
classifier.load_dataset(traing, validation_split=0.1, stratify=trainn['1'])
classifier.fine_tune(epochs=3)
germanbert_eval = [np.round(classifier.predict_one(each), decimals=0) for each in testg['0']]
true = testn['1'].tolist()
lst = []
for arr in germanbert_eval:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
germanbert = [np.round(classifier.predict_one(each), decimals=0) for each in test_df['c_text']]
true = label_df['1'].tolist()
lst = []
for arr in germanbert:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
clean_autosave()
print(" ")

In [None]:
# Deepset GermanBERT Base Cased (epochs=3)
print('='*50)
print('Deepset GermanBERT Base Cased (epochs=3)') 
print('-'*50)
print(" ")
classifier = SentenceClassifier(model_name='bert-base-german-cased', max_length=128)
classifier.load_dataset(traing, validation_split=0.1, stratify=trainn['1'])
classifier.fine_tune(epochs=3)
rlgermanbert_eval = [np.round(classifier.predict_one(each), decimals=0) for each in testg['0']]
true = testn['1'].tolist()
lst = []
for arr in rlgermanbert_eval:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
rlgermanbert = [np.round(classifier.predict_one(each), decimals=0) for each in test_df['c_text']]
true = label_df['1'].tolist()
lst = []
for arr in rlgermanbert:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
clean_autosave()
print(" ")

In [None]:
# mBERT Base Cased (epochs=3)
print('='*50)
print('mBERT Base Cased (epochs=3)') 
print('-'*50)
print(" ")
classifier = SentenceClassifier(model_name='bert-base-multilingual-cased', max_length=128)
classifier.load_dataset(traing, validation_split=0.1, stratify=trainn['1'])
classifier.fine_tune(epochs=3)
mbertcg_eval = [np.round(classifier.predict_one(each), decimals=0) for each in testg['0']]
true = testn['1'].tolist()
lst = []
for arr in mbertcg_eval:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
mbertcg = [np.round(classifier.predict_one(each), decimals=0) for each in test_df['c_text']]
true = label_df['1'].tolist()
lst = []
for arr in mbertcg:
    lst.append(np.argmax(arr))
predicted = lst
evaluation_result(true, predicted)
clean_autosave()
print(" ")

In [None]:
bertb_r = []
mbertc_r = []
germanbert_r = []
rlgermanbert_r = []
mbertcg_r = []
for each in bertb:
  bertb_r.append(int(each[1]))
for each in mbertc:
  mbertc_r.append(int(each[1]))
for each in germanbert:
  germanbert_r.append(int(each[1]))
for each in rlgermanbert:
  rlgermanbert_r.append(int(each[1]))
for each in mbertcg:
  mbertcg_r.append(int(each[1]))

data = pd.DataFrame([test_df['comment_id'].tolist(), bertb_r, mbertc_r, germanbert_r, rlgermanbert_r, mbertcg_r]) #Each list would be added as a row
data = data.transpose() #To Transpose and make each rows as columns
data.columns=['comment_id', 'bertb', 'mbertc', 'germanbert', 'rlgermanbert', 'mbertcg'] #Rename the columns

if TASK_NUMBER == 1:
  data.to_csv('task1_E.csv', index=False)
elif TASK_NUMBER == 2:
  data.to_csv('task2_E.csv', index=False)
elif TASK_NUMBER == 3:
  data.to_csv('task3_E.csv', index=False)

display(data)