## Importing Libraries


In [1]:
!pip -q install simpletransformers



In [2]:
!pip -q install bs4



In [3]:
import pandas as pd
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import sklearn
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

## Loading Files From Dataset

In [4]:
df = pd.read_csv("../input/emotion-analysis-in-tamil/Emotion_Analysis_Task_A_train.csv",header=None,sep='\t')
df_eval = pd.read_csv("../input/emotion-analysis-in-tamil/Emotion_Analysis_Task_A_dev.csv",header=None,sep='\t')
df_test = pd.read_csv("../input/emotion-analysis-in-tamil/Emotion_Analysis_Task_A_test.csv",header=None,sep='\t')

In [5]:
df_eval.rename(columns={0:'Labels',1:'Text'},inplace=True)
df_eval = df_eval[['Text','Labels']]
df.rename(columns={0:'Labels',1:'Text'},inplace=True)
df = df[['Text','Labels']]
df_test.rename(columns={0:'Labels',1:'Text'},inplace=True)
df_test = df_test[['Text','Labels']]

In [6]:
num_labels = len(df['Labels'].unique())
keys = list(df['Labels'].unique())
values = list(range(0, num_labels))
label_dict = dict(zip(keys,values))
df['Labels'] = df['Labels'].apply(lambda x:label_dict[x])
df_eval['Labels'] = df_eval['Labels'].apply(lambda x:label_dict[x])
df_test['Labels'] = df_test['Labels'].apply(lambda x:label_dict[x])
num_labels

11

## Balancing the imbalanced dataset

In [7]:
def oversample(df):
        classes = df['Labels'].value_counts().to_dict()
        most = max(classes.values())
        classes_list = []
        for key in classes:
            classes_list.append(df[df['Labels'] == key]) 
        classes_sample = []
        for i in range(1,len(classes_list)):
            classes_sample.append(classes_list[i].sample(most, replace=True))
        df_maybe = pd.concat(classes_sample)
        final_df = pd.concat([df_maybe,classes_list[0]], axis=0)
        final_df = final_df.reset_index(drop=True)
        return pd.DataFrame({'Text': final_df['Text'].tolist(), 'Labels': final_df['Labels'].tolist()})

In [8]:
def over_under_sample(df):
  unq_labels = list(set(df['Labels'].tolist()))
  texts = df['Text'].tolist()
  labels = df['Labels'].tolist()
  data_dict = dict()

  for l in unq_labels:
    data_dict[l] = []

  for i in range(len(texts)):
    data_dict[labels[i]].append(texts[i])

  req_len = len(labels)//len(unq_labels)

  for label in data_dict:
    if len(data_dict[label]) > req_len:
      data_dict[label] = data_dict[label][:req_len]

    new_texts = []

    new_labels = []      
    for l in data_dict:
      new_texts += data_dict[l]
      new_labels += [l]*len(data_dict[l])
    return oversample(pd.DataFrame({'Text': new_texts, 'Labels': new_labels}))

In [9]:
df = over_under_sample(df)

## Model Training

In [10]:
model_args = ClassificationArgs()

In [11]:
model_args.overwrite_output_dir=True
model_args.eval_batch_size=8
model_args.train_batch_size=8
model_args.learning_rate=4e-5



In [12]:
model = ClassificationModel(
    'bert',
    'google/muril-base-cased',
    num_labels=11,
    args=model_args,
    tokenizer_type="bert",
    tokenizer_name='google/muril-base-cased'
) 

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/909M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not in

Downloading:   0%|          | 0.00/3.02M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/113 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [13]:
for i in range(0,3):
    !rm -rf /kaggle/working/outputs
    model.train_model(df,eval_data=df_eval,acc=sklearn.metrics.classification_report)
    result, model_outputs, preds_list = model.eval_model(df_test,acc=sklearn.metrics.classification_report)
    for j in result.values():
        print(j)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/23474 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/2935 [00:00<?, ?it/s]

  model.parameters(), args.max_grad_norm
  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/4440 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/555 [00:00<?, ?it/s]

0.17738863009987985
              precision    recall  f1-score   support

         0.0       0.60      0.00      0.00      1538
         1.0       0.18      0.38      0.25       244
         2.0       0.66      0.07      0.13       702
         3.0       0.13      0.18      0.15       277
         4.0       0.24      0.11      0.15       377
         5.0       0.22      0.58      0.32       271
         6.0       0.49      0.61      0.54       500
         7.0       0.12      0.62      0.20       196
         8.0       0.03      0.36      0.06        61
         9.0       0.37      0.28      0.32       241
        10.0       0.09      0.33      0.14        33

    accuracy                           0.21      4440
   macro avg       0.29      0.32      0.21      4440
weighted avg       0.45      0.21      0.17      4440

2.0985558312218466


  0%|          | 0/23474 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/2935 [00:00<?, ?it/s]

  0%|          | 0/4440 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/555 [00:00<?, ?it/s]

0.27101908014819803
              precision    recall  f1-score   support

         0.0       0.52      0.16      0.24      1538
         1.0       0.21      0.33      0.25       244
         2.0       0.65      0.50      0.56       702
         3.0       0.18      0.25      0.21       277
         4.0       0.26      0.40      0.31       377
         5.0       0.26      0.49      0.34       271
         6.0       0.49      0.59      0.54       500
         7.0       0.15      0.36      0.21       196
         8.0       0.05      0.11      0.07        61
         9.0       0.30      0.44      0.36       241
        10.0       0.32      0.27      0.30        33

    accuracy                           0.34      4440
   macro avg       0.31      0.35      0.31      4440
weighted avg       0.42      0.34      0.34      4440

2.1036428021955063


  0%|          | 0/23474 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/2935 [00:00<?, ?it/s]

  0%|          | 0/4440 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/555 [00:00<?, ?it/s]

0.28114539933074395
              precision    recall  f1-score   support

         0.0       0.54      0.24      0.33      1538
         1.0       0.21      0.31      0.25       244
         2.0       0.59      0.55      0.57       702
         3.0       0.16      0.22      0.19       277
         4.0       0.26      0.39      0.31       377
         5.0       0.25      0.42      0.31       271
         6.0       0.49      0.58      0.53       500
         7.0       0.19      0.34      0.24       196
         8.0       0.06      0.05      0.05        61
         9.0       0.33      0.42      0.37       241
        10.0       0.23      0.21      0.22        33

    accuracy                           0.36      4440
   macro avg       0.30      0.34      0.31      4440
weighted avg       0.42      0.36      0.37      4440

2.420247815106366


In [14]:
predictions, raw_outputs = model.predict(df_test['Text'].to_list())

  0%|          | 0/4440 [00:00<?, ?it/s]

  0%|          | 0/555 [00:00<?, ?it/s]

In [15]:
df_final = df_test.copy()
reverse_label_dict = {v:u for u,v in label_dict.items()}
reverse_label_dict
df_final['Predicted_Labels'] = predictions
df_final['Predicted_Labels'] = df_final['Predicted_Labels'].apply(lambda x:reverse_label_dict[x])
df_final['Labels'] = df_final['Labels'].apply(lambda x:reverse_label_dict[x])
df_final['pid'] = df_final.index
df_final = df_final[['pid','Predicted_Labels','Labels']]

In [16]:
df_final

Unnamed: 0,pid,Predicted_Labels,Labels
0,0,Ambiguous,Ambiguous
1,1,Disguist,Disguist
2,2,Anger,Disguist
3,3,Ambiguous,Ambiguous
4,4,Joy,Joy
...,...,...,...
4435,4435,Neutral,Neutral
4436,4436,Trust,Trust
4437,4437,Trust,Anticipation
4438,4438,Joy,Joy


In [17]:
score = f1_score(df_final['Labels'],df_final['Predicted_Labels'],average='macro')
print("The macro average f1 score is:" + str(score))

The macro average f1 score is:0.30695567159425036
