In [None]:
import simpletransformers
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging
import wandb
from sklearn.metrics import accuracy_score

## get wrong predictions

In [None]:
df = pd.read_csv('messages_labels.csv')
multiclass_df = df[['encoded_message', 'convo_stage']]
multiclass_df.columns = ['text', 'labels']

from sklearn.model_selection import train_test_split
train, test = train_test_split(multiclass_df, test_size=0.2, random_state = 100)

In [None]:
model = ClassificationModel(
    'longformer',
    'multiclass_cls_param/',
    num_labels=6
) 

result, model_outputs, wrong_predictions = model.eval_model(test, acc=accuracy_score)

In [None]:
guid = []
text = []
label = []
for i in range(len(wrong_predictions)):
    guid.append(wrong_predictions[i].guid)
    text.append(wrong_predictions[i].text_a)
    label.append(wrong_predictions[i].label)

In [None]:
dict = {'guid': guid, 'text': text, 'label': label}  
wrong_pred_df = pd.DataFrame(dict)

In [None]:
wrong_pred_df['label'].value_counts()

In [None]:
test['labels'].value_counts()

In [None]:
wrong_pred_df.head()

In [None]:
from scipy.special import softmax
import heapq

In [None]:
raw_outputs = []
probs = []
preds = []
second_preds = []
first_probs = []
second_probs = []

for i in wrong_pred_df["guid"]: 
    raw_output = model_outputs[i]
    prob = softmax(raw_output)
    indices = heapq.nlargest(2, range(len(prob)), key=prob.__getitem__)
    first_index = indices[0]
    second_index = indices[1]
    first_prob = prob[first_index]
    second_prob = prob[second_index]
    
    raw_outputs.append(list(raw_output))
    probs.append(list(prob))
    preds.append(first_index)
    second_preds.append(second_index)
    first_probs.append(first_prob)
    second_probs.append(second_prob)

In [None]:
wrong_pred_df["raw_outputs"] = raw_outputs
wrong_pred_df["probabilities"] = probs
wrong_pred_df["predictions"] = preds
wrong_pred_df["first_probs"] = first_probs
wrong_pred_df["second_preds"] = second_preds
wrong_pred_df["second_probs"] = second_probs

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 2]['predictions'].value_counts()

##### label 2 is wrongly identified as 4

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 3]['predictions'].value_counts()

##### label 3 is wrongly identified as 4 and 2 with equal amount

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 4]['predictions'].value_counts()

##### label 4 is wrongly identified as 2

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 5]['predictions'].value_counts()

In [None]:
almost_right = []
diffs = []
for i in range(wrong_pred_df.shape[0]):
    if wrong_pred_df.iloc[i].label == wrong_pred_df.iloc[i].second_preds:
        almost_right.append(1)
        diff = wrong_pred_df.iloc[i].first_probs - wrong_pred_df.iloc[i].second_probs
        diffs.append(diff)
        
    else:
        almost_right.append(0)
        diffs.append("-100")

In [None]:
wrong_pred_df["almost_right"] = almost_right
wrong_pred_df["differences"] = diffs

##### label 5 is wrongly identified as 4

In [None]:
wrong_pred_df["differences"] = wrong_pred_df["differences"].astype(float)
# num of almost almost right predictions
wrong_pred_df[(wrong_pred_df["differences"] < 0.2) & (wrong_pred_df["differences"] > -1)].shape[0]

In [None]:
wrong_pred_df.almost_right.value_counts()

In [None]:
wrong_pred_df.to_csv("wrong_pred_df.csv")

In [None]:
wrong_pred_df = pd.read_csv("wrong_pred_df.csv")

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 1]['predictions'].value_counts()

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 2]['predictions'].value_counts()

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 3]['predictions'].value_counts()

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 4]['predictions'].value_counts()

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 5]['predictions'].value_counts()

In [None]:
wrong_pred_df[wrong_pred_df["label"] == 0]['predictions'].value_counts()

In [None]:
wrong_pred_df.almost_right.value_counts()

## Experiment without 4

In [None]:
import simpletransformers
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging
import wandb
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('messages_labels.csv')

In [None]:
multiclass_df = df[['encoded_message', 'convo_stage']]

In [None]:
df = multiclass_df[multiclass_df['convo_stage'] != 4]

In [None]:
df.columns = ['text', 'labels']

In [None]:
df['labels'] = df['labels'].replace([5], 4)

In [None]:
df['labels'].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2, random_state = 100)

In [None]:
model_args = ClassificationArgs(num_train_epochs = 3, 
                                overwrite_output_dir = True,
                                output_dir = "multiclass_cls_no4/",
                                max_seq_length = 128,
                                eval_batch_size = 4,
                                train_batch_size = 4,
                                cache_dir = "multiclass_cls_no4/cache_dir/",
                                learning_rate = 1e-5,
                                save_model_every_epoch = False,
                                weight_decay = 0.01,
                                warmup_ratio = 0.05,
                                use_early_stopping = True,
                                early_stopping_delta = 0.01,
                                early_stopping_metric = "acc",
                                early_stopping_metric_minimize = False,
                                evaluate_during_training = True
                               )

# Create a ClassificationModel
model = ClassificationModel(
    'longformer',
    'MaskedLM/checkpoint_240000',
    num_labels=5,
    args=model_args
) 

In [None]:
model.train_model(train_df = train, eval_df = test, acc=accuracy_score)

In [None]:
result, model_outputs, wrong_predictions = model.eval_model(test, acc=accuracy_score)

## Experiment Only 4

In [None]:
import simpletransformers
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging
import wandb
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('messages_labels.csv')

In [None]:
multiclass_df = df[['encoded_message', 'convo_stage']]

In [None]:
multiclass_df.columns = ['text', 'labels']

In [None]:
multiclass_df['labels'] = multiclass_df['labels'].replace([0, 1, 2, 3, 5], 0)
multiclass_df['labels'] = multiclass_df['labels'].replace([4], 1)

In [None]:
multiclass_df['labels'].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(multiclass_df, test_size=0.2, random_state = 100)

In [None]:
model_args = ClassificationArgs(num_train_epochs = 3, 
                                overwrite_output_dir = True,
                                output_dir = "multiclass_cls_only4/",
                                max_seq_length = 128,
                                eval_batch_size = 4,
                                train_batch_size = 4,
                                cache_dir = "multiclass_cls_only4/cache_dir/",
                                learning_rate = 1e-5,
                                save_model_every_epoch = False,
                                weight_decay = 0.01,
                                warmup_ratio = 0.05,
                                use_early_stopping = True,
                                early_stopping_delta = 0.01,
                                early_stopping_metric = "acc",
                                early_stopping_metric_minimize = False,
                                evaluate_during_training = True
                               )

# Create a ClassificationModel
model = ClassificationModel(
    'longformer',
    'MaskedLM/checkpoint_240000',
    num_labels=2,
    args=model_args
) 

In [None]:
model.train_model(train_df = train, eval_df = test, acc=accuracy_score)