## Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel

## Load Dataset

In [None]:
df = pd.read_csv('offensive_tweet_dataset/labeled_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


In [None]:
## Check for any null values
df.isnull().values.any()

False

Relevant columns: class (label) & tweet (data)  
class is labeled as:
- 0 if hate speech
- 1 if offensive
- 2 if neither

## Drop unused columns

In [None]:
df = df[['class', 'tweet']]
df

Unnamed: 0,class,tweet
0,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
...,...,...
24778,1,you's a muthaf***in lie &#8220;@LifeAsKing: @2...
24779,2,"you've gone and broke the wrong heart baby, an..."
24780,1,young buck wanna eat!!.. dat nigguh like I ain...
24781,1,youu got wild bitches tellin you lies


## Split Dataset

In [None]:
X = df['tweet']
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

def check_ratio(feat_df: pd.DataFrame, df: pd.DataFrame, header: str) -> None:
    print(header + ' : {0} ({1:0.2f}%)'.format(len(feat_df), (len(feat_df)/len(df)) * 100.0))

# Verify split ratios
print('{0:0.2f}% in training set'.format((len(X_train)/len(df.index)) * 100))
print('{0:0.2f}% in test set'.format((len(X_test)/len(df.index)) * 100))
print('')
check_ratio(df.loc[df['class'] == 0], df.index, 'Original Hate Speech')
check_ratio(df.loc[df['class'] == 1], df.index, 'Original Offensive')
check_ratio(df.loc[df['class'] == 2], df.index, 'Original Neither')
print('')
check_ratio(y_train[y_train[:] == 0], y_train, 'Training Hate Speech')
check_ratio(y_train[y_train[:] == 1], y_train, 'Training Offensive')
check_ratio(y_train[y_train[:] == 2], y_train, 'Training Neither')
print('')
check_ratio(y_test[y_test[:] == 0], y_test, 'Test Hate Speech')
check_ratio(y_test[y_test[:] == 1], y_test, 'Test Offensive')
check_ratio(y_test[y_test[:] == 2], y_test, 'Test Neither')

80.00% in training set
20.00% in test set

Original Hate Speech : 1430 (5.77%)
Original Offensive : 19190 (77.43%)
Original Neither : 4163 (16.80%)

Training Hate Speech : 1140 (5.75%)
Training Offensive : 15358 (77.46%)
Training Neither : 3328 (16.79%)

Test Hate Speech : 290 (5.85%)
Test Offensive : 3832 (77.30%)
Test Neither : 835 (16.84%)


In [None]:
train_df = pd.DataFrame({'text': X_train, 'labels': y_train})
eval_df = pd.DataFrame({'text': X_test, 'labels': y_test})

## Training

In [None]:
train_args={
    'num_train_epochs': 15,
    'train_batch_size': 32,
    'eval_batch_size': 32,
    'overwrite_output_dir': True
}

In [None]:
model = ClassificationModel('roberta', 'roberta-base', use_cuda=True, cuda_device=0, num_labels=3, weight=[13.5, 1, 4.6], args=train_args) 

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

In [None]:
model.train_model(train_df)

  0%|          | 0/19826 [00:00<?, ?it/s]

Epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Running Epoch 0 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 1 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 2 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 3 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 4 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 5 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 6 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 7 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 8 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 9 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 10 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 11 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 12 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 13 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

Running Epoch 14 of 15:   0%|          | 0/620 [00:00<?, ?it/s]

(9300, 0.21479370324467875)

In [None]:
result, model_outputs, wrong_predictions = model.eval_model(eval_df)

  0%|          | 0/4957 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/155 [00:00<?, ?it/s]

In [None]:
result

{'mcc': 0.7380433149854716, 'eval_loss': 2.152385096530047}

In [None]:
len(wrong_predictions)

468