In [None]:
!pip install -q simpletransformers emoji transformers --upgrade

from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, plot_roc_curve
import matplotlib.pyplot as plt

from simpletransformers.classification import ClassificationModel, ClassificationArgs
from ipywidgets import interact, widgets
# Input Subtask
print("1: Toxic Comment Classification\n2: Engaging Comment Classification\n3: Fact Claiming Comment Classification\nChoose Subtask:")
TASK_NUMBER = int(input())

In [None]:
# load training dataset
df = pd.read_csv('GermEval21_Translated_final.csv')
df.columns
df['English_Google'] = df['English_Google'].fillna(df['comment_text'])
df['English_Google'] = df['English_Google'].apply(lambda x: np.str_(x))

# load test dataset
test_df = pd.read_csv('GermEval21_Test_Translated.csv')
test_df.columns
test_df['English_Google'] = test_df['English_Google'].fillna(test_df['c_text'])
test_df['English_Google'] = test_df['English_Google'].apply(lambda x: np.str_(x))

# load final test labels
label_df = pd.read_csv('truth.csv')

In [None]:
# split training dataset
if TASK_NUMBER == 1:
    print('='*50)
    print("Subtask 1: Toxic Comment Classification")
    print('='*50)
    df_eng = df.filter(["English_Google", "Sub1_Toxic"])
    df_eng = df_eng.rename(columns={'English_Google' : 'text', 'Sub1_Toxic' : 'labels'})
    trainn, testn = train_test_split(df_eng, test_size=0.2, stratify=df_eng['labels'], random_state=101)
    testnn, valn = train_test_split(testn, test_size=0.5, stratify=testn['labels'], random_state=101)
    df_ger = df.filter(["comment_text", "Sub1_Toxic"])
    df_ger = df_ger.rename(columns={'comment_text' : 'text', 'Sub1_Toxic' : 'labels'})
    traing, testg = train_test_split(df_ger, test_size=0.2, stratify=df_ger['labels'], random_state=101)
    testng, valg = train_test_split(testg, test_size=0.5, stratify=testg['labels'], random_state=101)
elif TASK_NUMBER == 2:
    print('='*50)
    print("Subtask 2: Engaging Comment Classification")
    print('='*50)
    df_eng = df.filter(["English_Google", "Sub2_Engaging"])
    df_eng = df_eng.rename(columns={'English_Google' : 'text', 'Sub2_Engaging' : 'labels'})
    trainn, testn = train_test_split(df_eng, test_size=0.2, stratify=df_eng['labels'], random_state=101)
    testnn, valn = train_test_split(testn, test_size=0.5, stratify=testn['labels'], random_state=101)
    df_ger = df.filter(["comment_text", "Sub2_Engaging"])
    df_ger = df_ger.rename(columns={'comment_text' : 'text', 'Sub2_Engaging' : 'labels'})
    traing, testg = train_test_split(df_ger, test_size=0.2, stratify=df_ger['labels'], random_state=101)
    testng, valg = train_test_split(testg, test_size=0.5, stratify=testg['labels'], random_state=101)
elif TASK_NUMBER == 3:
    print('='*50)
    print("Subtask 3: Fact Claiming Comment Classification")
    print('='*50)
    df_eng = df.filter(["English_Google", "Sub3_FactClaiming"])
    df_eng = df_eng.rename(columns={'English_Google' : 'text', 'Sub3_FactClaiming' : 'labels'})
    trainn, testn = train_test_split(df_eng, test_size=0.2, stratify=df_eng['labels'], random_state=101)
    testnn, valn = train_test_split(testn, test_size=0.5, stratify=testn['labels'], random_state=101)
    df_ger = df.filter(["comment_text", "Sub3_FactClaiming"])
    df_ger = df_ger.rename(columns={'comment_text' : 'text', 'Sub3_FactClaiming' : 'labels'})
    traing, testg = train_test_split(df_ger, test_size=0.2, stratify=df_ger['labels'], random_state=101)
    testng, valg = train_test_split(testg, test_size=0.5, stratify=testg['labels'], random_state=101)

# rename truth dataset
if TASK_NUMBER == 1:
    label_df = label_df.filter(["English_Google", "Sub1_Toxic"])
    label_df = label_df.rename(columns={'English_Google' : 'text', 'Sub1_Toxic' : 'labels'})
elif TASK_NUMBER == 2:
    label_df = label_df.filter(["English_Google", "Sub2_Engaging"])
    label_df = label_df.rename(columns={'English_Google' : 'text', 'Sub2_Engaging' : 'labels'})
elif TASK_NUMBER == 3:
    label_df = label_df.filter(["English_Google", "Sub3_FactClaiming"])
    label_df = label_df.rename(columns={'English_Google' : 'text', 'Sub3_FactClaiming' : 'labels'})

In [None]:
def evaluation_result(true, predicted):
  print(f"Accuracy Score: {round(accuracy_score(true,predicted) * 100,2)} %\n")
  # accuracy
  accuracy = accuracy_score(true,predicted)
  print('Accuracy: %f' % accuracy)
  # macro f1 
  f1 = f1_score(true,predicted, average='macro')
  print('F1 score: %f' % f1)
  # ROC AUC
  auc = roc_auc_score(true,predicted)
  print('ROC AUC: %f' % auc)
  # precision
  precision = precision_score(true,predicted)
  print('Precision: %f' % precision)
  # recall
  recall = recall_score(true,predicted)
  print('Recall: %f' % recall)
  # confusion matrix
  matrix = confusion_matrix(true,predicted)
  print(matrix)
  # classification report
  report = classification_report(true,predicted)
  print(report)
  print(" ")
    
model_args ={'num_train_epochs': 3,
             'train_batch_size': 16,
             'eval_batch_size': 32,
             'reprocess_input_data': True,
             'overwrite_output_dir': True,
             'evaluate_during_training': True,
             'manual_seed': 101,
             'use_multiprocessing': True,
             'learning_rate': 4e-5}

In [None]:
# English BERTweet Base
print('='*50)
print('English BERTweet Base')
print('-'*50)
print(" ")
print('-'*50)
print('Evaluation set')
print('-'*50)
model = ClassificationModel('bertweet', 'vinai/bertweet-base', args=model_args, use_cuda=True)
model.train_model(trainn, eval_df=testnn, acc=accuracy_score)
# eval
predictor = model.predict(valn['text'].tolist())
bertweet = predictor[0].tolist()
true = valn['labels'].tolist()
evaluation_result(true, bertweet)
# test
predictor = model.predict(test_df['English_Google'].tolist())
bertweet = predictor[0].tolist()
# after test labels released
true = label_df['labels'].tolist()
evaluation_result(true, bertweet)

In [None]:
# English XLM-T
print('='*50)
print('English cardiffnlp/twitter-xlm-roberta-base-sentiment')
print('-'*50)
print(" ")
model = ClassificationModel('xlmroberta', 'cardiffnlp/twitter-xlm-roberta-base-sentiment', args=model_args, use_cuda=True)
model.train_model(trainn, eval_df=testnn, acc=accuracy_score)
predictor = model.predict(valn['text'].tolist())
txlmr_en = predictor[0].tolist()
true = valn['labels'].tolist()
evaluation_result(true, txlmr_en)
print(" ")
# test
predictor = model.predict(test_df['English_Google'].tolist())
txlmr_en = predictor[0].tolist()
# after test labels released
true = label_df['labels'].tolist()
evaluation_result(true, txlmr_en)

In [None]:
# German XLM-T
print('='*50)
print('German cardiffnlp/twitter-xlm-roberta-base-sentiment')
print('-'*50)
print(" ")
model = ClassificationModel('xlmroberta', 'cardiffnlp/twitter-xlm-roberta-base-sentiment', args=model_args, use_cuda=True)
model.train_model(traing, eval_df=testng, acc=accuracy_score)
predictor = model.predict(valg['text'].tolist())
txlmr_de = predictor[0].tolist()
true = valn['labels'].tolist()
evaluation_result(true, txlmr_de)
print(" ")
# test
predictor = model.predict(test_df['c_text'].tolist())
txlmr_de = predictor[0].tolist()
# after test labels released
true = label_df['labels'].tolist()
evaluation_result(true, txlmr_de)

In [None]:
# English XLM-R Base
print('='*50)
print('English xlm-roberta-base')
print('-'*50)
print(" ")
model = ClassificationModel('xlmroberta', 'xlm-roberta-base', args=model_args, use_cuda=True)
model.train_model(trainn, eval_df=testnn, acc=accuracy_score)
predictor = model.predict(valn['text'].tolist())
xlmr_en = predictor[0].tolist()
true = valn['labels'].tolist()
evaluation_result(true, xlmr_en)
print(" ")
# test
predictor = model.predict(test_df['English_Google'].tolist())
xlmr_en = predictor[0].tolist()
# after test labels released
true = label_df['labels'].tolist()
evaluation_result(true, xlmr_en)

In [None]:
# German XLM-R Base
print('='*50)
print('German xlm-roberta-base')
print('-'*50)
print(" ")
model = ClassificationModel('xlmroberta', 'xlm-roberta-base', args=model_args, use_cuda=True)
model.train_model(traing, eval_df=testng, acc=accuracy_score)
predictor = model.predict(valg['text'].tolist())
xlmr_de = predictor[0].tolist()
true = valn['labels'].tolist()
evaluation_result(true, xlmr_de)
print(" ")
# test
predictor = model.predict(test_df['c_text'].tolist())
xlmr_de = predictor[0].tolist()
# after test labels released
true = label_df['labels'].tolist()
evaluation_result(true, xlmr_de)

In [None]:
data = pd.DataFrame([test_df['comment_id'].tolist(), bertweet, txlmr_en, txlmr_de, xlmr_en, xlmr_de]) #Each list would be added as a row
data = data.transpose() #To Transpose and make each rows as columns
data.columns=['comment_id', 'bertweet', 'txlmr_en', 'txlmr_de', 'xlmr_en', 'xlmr_de'] #Rename the columns

display(data)

In [None]:
if TASK_NUMBER == 1:
  data.to_csv('task1_ST.csv', index=False)
elif TASK_NUMBER == 2:
  data.to_csv('task2_ST.csv', index=False)
elif TASK_NUMBER == 3:
  data.to_csv('task3_ST.csv', index=False)