In [13]:
import torch

In [14]:
use_cuda = torch.cuda.is_available()

In [15]:
use_cuda

True

## Importing Libraries


In [16]:
!pip -q install simpletransformers

In [17]:
!pip -q install bs4

In [18]:
import pandas as pd
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import sklearn
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

## Loading Files From Dataset

In [19]:
df = pd.read_csv("Emotion_Analysis_Task_B_train.tsv",sep='\t')
df_eval = pd.read_csv("Emotion_Analysis_Task_B_dev.tsv",sep='\t')
df_test = pd.read_csv("Emotion_Analysis_Task_B_test.csv",sep='\t')

In [20]:
df_eval.rename(columns={'category':'Labels','text':'Text'},inplace=True)
df_eval = df_eval[['Text','Labels']]
df.rename(columns={'category':'Labels','text':'Text'},inplace=True)
df = df[['Text','Labels']]
df_test.rename(columns={'category':'Labels','text':'Text'},inplace=True)
df_test = df_test[['Text','Labels']]

In [21]:
num_labels = len(df['Labels'].unique())
keys = list(df['Labels'].unique())
values = list(range(0, num_labels))
label_dict = dict(zip(keys,values))
df['Labels'] = df['Labels'].apply(lambda x:label_dict[x])
df_eval['Labels'] = df_eval['Labels'].apply(lambda x:label_dict[x])
df_test['Labels'] = df_test['Labels'].apply(lambda x:label_dict[x])
num_labels

31

## Balancing the imbalanced dataset

In [22]:
def oversample(df):
        classes = df['Labels'].value_counts().to_dict()
        most = max(classes.values())
        classes_list = []
        for key in classes:
            classes_list.append(df[df['Labels'] == key]) 
        classes_sample = []
        for i in range(1,len(classes_list)):
            classes_sample.append(classes_list[i].sample(most, replace=True))
        df_maybe = pd.concat(classes_sample)
        final_df = pd.concat([df_maybe,classes_list[0]], axis=0)
        final_df = final_df.reset_index(drop=True)
        return pd.DataFrame({'Text': final_df['Text'].tolist(), 'Labels': final_df['Labels'].tolist()})

In [24]:
def over_under_sample(df):
  unq_labels = list(set(df['Labels'].tolist()))
  texts = df['Text'].tolist()
  labels = df['Labels'].tolist()
  data_dict = dict()

  for l in unq_labels:
    data_dict[l] = []

  for i in range(len(texts)):
    data_dict[labels[i]].append(texts[i])

  req_len = len(labels)//len(unq_labels)

  for label in data_dict:
    if len(data_dict[label]) > req_len:
      data_dict[label] = data_dict[label][:req_len]

    new_texts = []

    new_labels = []      
    for l in data_dict:
      new_texts += data_dict[l]
      new_labels += [l]*len(data_dict[l])
    return oversample(pd.DataFrame({'Text': new_texts, 'Labels': new_labels}))

In [25]:
df = over_under_sample(df)

## Model Training

In [28]:
model_args = ClassificationArgs()

In [29]:
model_args.overwrite_output_dir=True
model_args.eval_batch_size=8
model_args.train_batch_size=8
model_args.learning_rate=4e-5

In [30]:
model = ClassificationModel(
    'albert',
    'ai4bharat/indic-bert',
    num_labels=31,
    args=model_args,
    tokenizer_type="albert",
    tokenizer_name='ai4bharat/indic-bert'
) 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'sop_classifier.classifier.bias', 'sop_classifier.classifier.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

In [31]:
for i in range(0,3):
    !rm -rf /kaggle/working/outputs
    model.train_model(df,eval_data=df_eval,acc=sklearn.metrics.classification_report)
    result, model_outputs, preds_list = model.eval_model(df_test,acc=sklearn.metrics.classification_report)
    for j in result.values():
        print(j)



  0%|          | 0/147560 [00:00<?, ?it/s]



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/18445 [00:00<?, ?it/s]



  0%|          | 0/4269 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/534 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.07012471861403029
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       268
         1.0       0.00      0.00      0.00       325
         2.0       0.00      0.00      0.00       176
         3.0       0.00      0.00      0.00       477
         4.0       0.00      0.00      0.00        14
         5.0       0.33      0.56      0.42       114
         6.0       0.00      0.00      0.00        79
         7.0       0.06      0.09      0.07        56
         8.0       0.00      0.00      0.00        78
         9.0       0.50      0.21      0.30       658
        10.0       0.15      0.22      0.18       192
        11.0       0.11      0.22      0.15        50
        12.0       0.20      0.09      0.12       236
        13.0       0.12      0.02      0.03       294
        14.0       0.02      0.21      0.04        62
        15.0       0.03      0.31      0.05        45
        16.0       0.00      0.00      0.00       193
       



  0%|          | 0/147560 [00:00<?, ?it/s]



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/18445 [00:00<?, ?it/s]



  0%|          | 0/4269 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/534 [00:00<?, ?it/s]

0.07284406628521772
              precision    recall  f1-score   support

         0.0       0.08      0.07      0.07       268
         1.0       0.11      0.12      0.12       325
         2.0       0.06      0.09      0.07       176
         3.0       0.11      0.02      0.04       477
         4.0       0.05      0.07      0.06        14
         5.0       0.41      0.60      0.48       114
         6.0       0.08      0.09      0.08        79
         7.0       0.12      0.09      0.10        56
         8.0       0.06      0.08      0.07        78
         9.0       0.60      0.09      0.15       658
        10.0       0.15      0.22      0.18       192
        11.0       0.10      0.02      0.03        50
        12.0       0.20      0.14      0.16       236
        13.0       0.14      0.11      0.12       294
        14.0       0.02      0.02      0.02        62
        15.0       0.08      0.04      0.06        45
        16.0       0.09      0.11      0.10       193
       



  0%|          | 0/147560 [00:00<?, ?it/s]



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/18445 [00:00<?, ?it/s]



  0%|          | 0/4269 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/534 [00:00<?, ?it/s]

0.07397368940179047
              precision    recall  f1-score   support

         0.0       0.08      0.10      0.09       268
         1.0       0.17      0.14      0.16       325
         2.0       0.07      0.14      0.10       176
         3.0       0.16      0.12      0.14       477
         4.0       0.00      0.00      0.00        14
         5.0       0.38      0.57      0.45       114
         6.0       0.03      0.10      0.05        79
         7.0       0.09      0.05      0.07        56
         8.0       0.08      0.06      0.07        78
         9.0       0.70      0.03      0.07       658
        10.0       0.14      0.27      0.18       192
        11.0       0.00      0.00      0.00        50
        12.0       0.21      0.12      0.15       236
        13.0       0.14      0.14      0.14       294
        14.0       0.07      0.02      0.03        62
        15.0       0.10      0.02      0.04        45
        16.0       0.11      0.13      0.12       193
       

In [32]:
predictions, raw_outputs = model.predict(df_test['Text'].to_list())

  0%|          | 0/4269 [00:00<?, ?it/s]

  0%|          | 0/534 [00:00<?, ?it/s]

In [33]:
df_final = df_test.copy()
reverse_label_dict = {v:u for u,v in label_dict.items()}
reverse_label_dict
df_final['Predicted_Labels'] = predictions
df_final['Predicted_Labels'] = df_final['Predicted_Labels'].apply(lambda x:reverse_label_dict[x])
df_final['Labels'] = df_final['Labels'].apply(lambda x:reverse_label_dict[x])
df_final['pid'] = df_final.index
df_final = df_final[['pid','Predicted_Labels','Labels']]

In [34]:
df_final

Unnamed: 0,pid,Predicted_Labels,Labels
0,0,உற்சாகம்,மகிழ்ச்சி
1,1,நடுநிலை,மகிழ்ச்சி
2,2,உண்மையை உணர்தல்,எதிர்பார்ப்பு
3,3,கிண்டல்,உண்மையை உணர்தல்
4,4,ஆர்வம்,உண்மையை உணர்தல்
...,...,...,...
4264,4264,குற்றமுணர்ந்ததால் ஏற்படும் வருத்தம்,போற்றுதல்
4265,4265,அக்கறை,உண்மையை உணர்தல்
4266,4266,உற்சாகம்,போற்றுதல்
4267,4267,எரிச்சல்,எதிர்பார்ப்பு


In [35]:
score = f1_score(df_final['Labels'],df_final['Predicted_Labels'],average='macro')
print("The macro average f1 score is:" + str(score))

The macro average f1 score is:0.0778709548670512
