------
### Library setup and mounting g-drive 


In [1]:
#!dir
#%cd "D:"

In [2]:
#!pip install git+https://github.com/huggingface/nlp
#!pip install --quiet transformers
#!pip install --quiet nlp==0.2.0
#!pip install --quiet datasets

In [3]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import transformers
import nlp
from sklearn import preprocessing
# from torch.utils.data import Dataset
from datasets import load_dataset, Dataset
import matplotlib.pyplot as plt
import logging
from sklearn.model_selection import train_test_split
logging.basicConfig(level=logging.CRITICAL)

import json
from tqdm.notebook import tqdm

tqdm.pandas()

  from .autonotebook import tqdm as notebook_tqdm


------
### Fetching our data


In [4]:
# paths to semeval dataset
path_to_longform_textrank_data = "../dataset/semeval/"
path_to_train = f"{path_to_longform_textrank_data}train.csv"
path_to_test = f"{path_to_longform_textrank_data}test.csv"
path_to_validation = f"{path_to_longform_textrank_data}final_test_v1.csv"

# paths to fnc dataset
path_to_fnc_data = "../dataset/FNC/"
path_to_fnc_train = f"{path_to_fnc_data}train_v2.csv"
path_to_fnc_validation = f"{path_to_fnc_data}validation.csv"
path_to_fnc_test = f"{path_to_fnc_data}test_v2.csv"

# path to hyperpartisan pan data
path_to_pan_data = "../dataset/hyperpartisan_dataset/"
path_to_pan_train = f"{path_to_pan_data}train.csv"
path_to_pan_validation = f"{path_to_pan_data}validation.csv"
path_to_pan_test = f"{path_to_pan_data}test.csv"

# path to fnid data
path_to_fnid = "../dataset/FNID-dataset/"
path_to_fnid_train = f"{path_to_fnid}liar_train_processed.csv"
path_to_fnid_validation = f"{path_to_fnid}liar_dev_processed.csv"
path_to_fnid_test = f"{path_to_fnid}liar_test_processed.csv"

In [5]:
dataset_dict = {
  "stsb": load_dataset('glue', name='stsb'),
  "semeval": load_dataset('csv', data_files={
      'train': path_to_train,
      'validation': path_to_validation,
      'test': path_to_test,
  }),
  "fnc": load_dataset('csv', data_files={
      'train': path_to_fnc_train,
      # 'validation': path_to_fnc_validation,
      'test': path_to_fnc_test,
  }),
  "fnid": load_dataset('csv', data_files={
      'train': path_to_fnid_train,
      # 'validation': path_to_fnc_validation,
      'test': path_to_fnid_test,
  }),
  "pan": load_dataset('csv', data_files={
      'train': path_to_pan_train,
      'validation': path_to_pan_validation,
      'test': path_to_pan_test,
  })
}

100%|██████████| 3/3 [00:00<00:00, 115.38it/s]
100%|██████████| 3/3 [00:00<00:00, 72.84it/s]
100%|██████████| 2/2 [00:00<00:00, 14.22it/s]
100%|██████████| 2/2 [00:00<00:00, 25.52it/s]
100%|██████████| 3/3 [00:00<00:00, 55.98it/s]


In [6]:
dataset_dict

{'stsb': DatasetDict({
     train: Dataset({
         features: ['sentence1', 'sentence2', 'label', 'idx'],
         num_rows: 5749
     })
     validation: Dataset({
         features: ['sentence1', 'sentence2', 'label', 'idx'],
         num_rows: 1500
     })
     test: Dataset({
         features: ['sentence1', 'sentence2', 'label', 'idx'],
         num_rows: 1379
     })
 }),
 'semeval': DatasetDict({
     train: Dataset({
         features: ['pair_id', 'label', 'sentence1', 'sentence2', 'idx'],
         num_rows: 3651
     })
     validation: Dataset({
         features: ['pair_id', 'label', 'sentence1', 'sentence2', 'idx'],
         num_rows: 4902
     })
     test: Dataset({
         features: ['pair_id', 'label', 'sentence1', 'sentence2', 'idx'],
         num_rows: 408
     })
 }),
 'fnc': DatasetDict({
     train: Dataset({
         features: ['Headline', 'articleBody', 'Stance', 'idx'],
         num_rows: 44974
     })
     test: Dataset({
         features: ['Headline', 'art

We can show one example from each task.

In [7]:
for task_name, dataset in dataset_dict.items():
    print(task_name)
    print(dataset_dict[task_name]["train"][0])
    print()

stsb
{'sentence1': 'A plane is taking off.', 'sentence2': 'An air plane is taking off.', 'label': 5.0, 'idx': 0}

semeval
{'pair_id': '1647195207_1647422140', 'label': 1.0, 'sentence1': 'Just after 9:00 p.m., the first polling results of the presidential election were reported, and we already know who will face the second round, which will take place on 12 July. During the first election tour none of them received 50% support, so the second round of elections is organized which will take place on July 12th. This year the situation is so exceptional that many citizens vote in correspondence, which may affect the working time of election committee. Millions of Polish and Polish people have gone to the polls today to vote for their candidate. Among them were also a whole bunch of celebrities who reported their vote on social media. The official results of the presidential election will be published by the State Election Commission. The first round of presidential elections is already behi

------
### Creating multitask training model

In [8]:
class MultitaskModel(transformers.PreTrainedModel):
    def __init__(self, encoder, taskmodels_dict):
        """
        Setting MultitaskModel up as a PretrainedModel allows us
        to take better advantage of Trainer features
        """
        super().__init__(transformers.PretrainedConfig())

        self.encoder = encoder
        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)

    @classmethod
    def create(cls, model_name, model_type_dict, model_config_dict):
        """
        This creates a MultitaskModel using the model class and config objects
        from single-task models. 

        We do this by creating each single-task model, and having them share
        the same encoder transformer.
        """
        shared_encoder = None
        taskmodels_dict = {}
        for task_name, model_type in model_type_dict.items():
            model = model_type.from_pretrained(
                model_name, 
                config=model_config_dict[task_name],
            )
            if shared_encoder is None:
                print(cls.get_encoder_attr_name(model))
                shared_encoder = getattr(model, cls.get_encoder_attr_name(model))
            else:
                setattr(model, cls.get_encoder_attr_name(model), shared_encoder)
            taskmodels_dict[task_name] = model
        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)

    @classmethod
    def get_encoder_attr_name(cls, model):
        """
        The encoder transformer is named differently in each model "architecture".
        This method lets us get the name of the encoder attribute
        """
        model_class_name = model.__class__.__name__
        if model_class_name.startswith("Bert"):
            return "bert"
        elif model_class_name.startswith("Roberta"):
            return "roberta"
        elif model_class_name.startswith("Albert"):
            return "albert"
        elif model_class_name.startswith("Deberta"):
            return "deberta"
        else:
            raise KeyError(f"Add support for new model {model_class_name}")

    def forward(self, task_name, **kwargs):
        return self.taskmodels_dict[task_name](**kwargs)

The `MultitaskModel` class consists of only two components - the shared "encoder", a dictionary to the individual task models. Now, we can simply create the corresponding task models by supplying the invidual model classes and model configs. We will use Transformers' AutoModels to further automate the choice of model class given a model architecture (in our case, let's use `microsoft/deberta-base`).

In [9]:
model_name = "microsoft/deberta-base"
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        "stsb": transformers.AutoModelForSequenceClassification,
        "semeval": transformers.AutoModelForSequenceClassification,
        "pan": transformers.AutoModelForSequenceClassification,
        "fnid": transformers.AutoModelForSequenceClassification,
    },
    model_config_dict={
        "stsb": transformers.AutoConfig.from_pretrained(model_name, num_labels=1),
        "semeval": transformers.AutoConfig.from_pretrained(model_name, num_labels=1),
        "pan": transformers.AutoConfig.from_pretrained(model_name, num_labels=6),
        "fnid": transformers.AutoConfig.from_pretrained(model_name, num_labels=1),
    },
)

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'pooler

deberta


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'pooler

To confirm that all three task-models use the same encoder, we can check the data pointers of the respective encoders. In this case, we'll check that the word embeddings in each model all point to the same memory location.

In [10]:
if model_name.startswith("microsoft/deberta-base"):
    print(multitask_model.encoder.embeddings.word_embeddings.weight.data_ptr())
    print(multitask_model.taskmodels_dict["stsb"].deberta.embeddings.word_embeddings.weight.data_ptr())
    print(multitask_model.taskmodels_dict["semeval"].deberta.embeddings.word_embeddings.weight.data_ptr())
    # print(multitask_model.taskmodels_dict["fnc"].deberta.embeddings.word_embeddings.weight.data_ptr())
    print(multitask_model.taskmodels_dict["pan"].deberta.embeddings.word_embeddings.weight.data_ptr())
    print(multitask_model.taskmodels_dict["fnid"].deberta.embeddings.word_embeddings.weight.data_ptr())
else:
    print("Exercise for the reader: add a check for other model architectures =)")

1651767124096
1651767124096
1651767124096
1651767124096
1651767124096


In [11]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

------
### Preparation of dataset:

In [12]:
max_length = 128

def convert_to_stsb_features(example_batch):
    inputs = list(zip(example_batch['sentence1'], example_batch['sentence2']))
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, padding="max_length", pad_to_max_length=True
    )
    features["labels"] = list(np.float_(example_batch["label"]))
    return features

def convert_to_semeval_features(example_batch):
    inputs = list(zip(example_batch['sentence1'], example_batch['sentence2']))
    features = tokenizer.batch_encode_plus(
        inputs, max_length=512, padding="max_length", pad_to_max_length=True
    )
    features["labels"] = example_batch["label"]
    return features

def convert_to_fnc_features(example_batch):
    inputs = list(zip(example_batch['Headline'], example_batch['articleBody']))
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, padding="max_length", pad_to_max_length=True
    )
    features["labels"] = example_batch["Stance"]
    return features

def convert_to_fnid_features(example_batch):
    inputs = list(zip(example_batch['statement'], example_batch['fullText_based_content']))
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, padding="max_length", pad_to_max_length=True
    )
    features["labels"] = example_batch["label-liar"]
    return features

def convert_to_pan_features(example_batch):
    features = tokenizer.batch_encode_plus(
        example_batch["text"], max_length=max_length, padding="max_length", pad_to_max_length=True
    )
    features["labels"] = example_batch["bias"]
    return features

convert_func_dict = {
    "stsb": convert_to_stsb_features,
    "fnid": convert_to_fnid_features,
    "pan": convert_to_pan_features,
    "semeval": convert_to_semeval_features,
    "fnc": convert_to_fnc_features
}

Now that we have defined the above functions, we can use `dataset.map` method available in the NLP library to apply the functions over our entire datasets. The NLP library that handles the mapping efficiently and caches the features.

In [13]:
columns_dict = {
    "stsb": ['input_ids', 'attention_mask', 'labels'],
    "semeval": ['input_ids', 'attention_mask', 'labels'],
    "pan": ['input_ids', 'attention_mask', 'labels'],
    "fnid": ['input_ids', 'attention_mask', 'labels'],
    "fnc": ['input_ids', 'attention_mask', 'labels'],
}

features_dict = {}
for task_name, dataset in dataset_dict.items():
    print(u"\u2192", task_name)
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))
        features_dict[task_name][phase].set_format(
            type="torch", 
            columns=columns_dict[task_name],
        )
        print(task_name, phase, len(phase_dataset), len(features_dict[task_name][phase]))

→ stsb


100%|██████████| 6/6 [00:01<00:00,  5.29ba/s]


stsb train 5749 5749
stsb train 5749 5749


100%|██████████| 2/2 [00:00<00:00,  9.01ba/s]


stsb validation 1500 1500
stsb validation 1500 1500


100%|██████████| 2/2 [00:00<00:00,  9.43ba/s]


stsb test 1379 1379
stsb test 1379 1379
→ semeval


100%|██████████| 4/4 [00:02<00:00,  1.64ba/s]


semeval train 3651 3651
semeval train 3651 3651


100%|██████████| 5/5 [00:03<00:00,  1.49ba/s]


semeval validation 4902 4902
semeval validation 4902 4902


100%|██████████| 1/1 [00:00<00:00,  3.86ba/s]


semeval test 408 408
semeval test 408 408
→ fnc


100%|██████████| 45/45 [00:38<00:00,  1.16ba/s]


fnc train 44974 44974
fnc train 44974 44974


100%|██████████| 5/5 [00:04<00:00,  1.15ba/s]


fnc test 4998 4998
fnc test 4998 4998
→ fnid


100%|██████████| 16/16 [00:25<00:00,  1.60s/ba]


fnid train 15052 15052
fnid train 15052 15052


100%|██████████| 2/2 [00:02<00:00,  1.09s/ba]


fnid test 1266 1266
fnid test 1266 1266
→ pan


100%|██████████| 11/11 [00:06<00:00,  1.60ba/s]


pan train 10837 10837
pan train 10837 10837


100%|██████████| 2/2 [00:01<00:00,  1.55ba/s]


pan validation 1913 1913
pan validation 1913 1913


100%|██████████| 3/3 [00:01<00:00,  2.01ba/s]

pan test 2250 2250
pan test 2250 2250





## Preparing a multi-task data loader and Trainer

Setting up a multi-task data loader should be simple in principle - we simply need to sample from multiple single-task data loaders with some probability, and feed each batch to the multi-task model above. Of course, along with each batch, we also need to tell the model what task it is for, so `MultitaskModel` knows to use the right corresponding task-model.

However, because we want to use the built-in `Trainer` class in Transformers, this gets a little tricky, since the `Trainer` expects a single data loader, and expects a very specific format of per-batch data. This slice of code is somewhat of a hack around that constraint. (This can become a lot more streamlined with some tweaks to the Trainer code from the Hugging Face folks =))

We need to define a `MultitaskDataloader` that combines several data loaders into a single "data loader" - not so different from our multi-task model above! This `MultitaskDataloader` should do what we described: sample from different single-task data loaders, and yield a task batch and the corresponding task name (we're going to add the `task_name` to the batch data).

We will also need to override the `get_train_dataloader` method of the `Trainer` to play well with our `MultitaskDataloader`. We do this with a `MultitaskTrainer`.

In [14]:
import dataclasses
from torch.utils.data.dataloader import DataLoader
from transformers.data.data_collator import DataCollator, InputDataClass, DefaultDataCollator
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler
from typing import List, Union, Dict


class NLPDataCollator(DefaultDataCollator):
    """
    Extending the existing DataCollator to work with NLP dataset batches
    """
    def collate_batch(self, features: List[Union[InputDataClass, Dict]]) -> Dict[str, torch.Tensor]:
        first = features[0]
        if isinstance(first, dict):
          # NLP data sets current works presents features as lists of dictionary
          # (one per example), so we  will adapt the collate_batch logic for that
          if "labels" in first and first["labels"] is not None:
              if first["labels"].dtype == torch.int64:
                  labels = torch.tensor([f["labels"] for f in features], dtype=torch.long)
              else:
                  labels = torch.tensor([f["labels"] for f in features], dtype=torch.float)
              batch = {"labels": labels}
          for k, v in first.items():
              if k != "labels" and v is not None and not isinstance(v, str):
                  batch[k] = torch.stack([f[k] for f in features])
          return batch
        else:
          # otherwise, revert to using the default collate_batch
          return DefaultDataCollator().collate_batch(features)


class StrIgnoreDevice(str):
    """
    This is a hack. The Trainer is going call .to(device) on every input
    value, but we need to pass in an additional `task_name` string.
    This prevents it from throwing an error
    """
    def to(self, device):
        return self


class DataLoaderWithTaskname:
    """
    Wrapper around a DataLoader to also yield a task name
    """
    def __init__(self, task_name, data_loader):
        self.task_name = task_name
        self.data_loader = data_loader

        self.batch_size = data_loader.batch_size
        self.dataset = data_loader.dataset

    def __len__(self):
        return len(self.data_loader)
    
    def __iter__(self):
        for batch in self.data_loader:
            batch["task_name"] = StrIgnoreDevice(self.task_name)
            yield batch


class MultitaskDataloader:
    """
    Data loader that combines and samples from multiple single-task
    data loaders.
    """
    def __init__(self, dataloader_dict):
        self.dataloader_dict = dataloader_dict
        self.num_batches_dict = {
            task_name: len(dataloader) 
            for task_name, dataloader in self.dataloader_dict.items()
        }
        self.task_name_list = list(self.dataloader_dict)
        self.dataset = [None] * sum(
            len(dataloader.dataset) 
            for dataloader in self.dataloader_dict.values()
        )

    def __len__(self):
        return sum(self.num_batches_dict.values())

    def __iter__(self):
        """
        For each batch, sample a task, and yield a batch from the respective
        task Dataloader.

        We use size-proportional sampling, but you could easily modify this
        to sample from some-other distribution.
        """
        task_choice_list = []
        for i, task_name in enumerate(self.task_name_list):
            task_choice_list += [i] * self.num_batches_dict[task_name]
        task_choice_list = np.array(task_choice_list)
        np.random.shuffle(task_choice_list)
        dataloader_iter_dict = {
            task_name: iter(dataloader) 
            for task_name, dataloader in self.dataloader_dict.items()
        }
        for task_choice in task_choice_list:
            task_name = self.task_name_list[task_choice]
            yield next(dataloader_iter_dict[task_name])    

class MultitaskTrainer(transformers.Trainer):

    def get_single_train_dataloader(self, task_name, train_dataset):
        """
        Create a single-task data loader that also yields task names
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")
        else:
            train_sampler = (
                RandomSampler(train_dataset)
                if self.args.local_rank == -1
                else DistributedSampler(train_dataset)
            )

        data_loader = DataLoaderWithTaskname(
            task_name=task_name,
            data_loader=DataLoader(
              train_dataset,
              batch_size=self.args.train_batch_size,
              sampler=train_sampler,
              collate_fn=self.data_collator.collate_batch,
            ),
        )
        return data_loader

    def get_train_dataloader(self):
        """
        Returns a MultitaskDataloader, which is not actually a Dataloader
        but an iterable that returns a generator that samples from each 
        task Dataloader
        """
        return MultitaskDataloader({
            task_name: self.get_single_train_dataloader(task_name, task_dataset)
            for task_name, task_dataset in self.train_dataset.items()
        })


## Time to train!

Okay, we have done all the hard work, now it is time for it to pay off. We can now simply create our `MultitaskTrainer`, and start training! 

In [20]:
train_dataset = {
    task_name: dataset["train"] 
    for task_name, dataset in features_dict.items()
}
trainer = MultitaskTrainer(
    model=multitask_model,
    args=transformers.TrainingArguments(
        output_dir="./longformer/model_pan_fnid_deberta",
        overwrite_output_dir=True,
        learning_rate=1e-4,
        do_train=True,
        num_train_epochs=2,
        # Adjust batch size if this doesn't fit on the Colab GPU
        per_device_train_batch_size=2,  
        save_steps=20000,
    ),
    data_collator=NLPDataCollator(),
    train_dataset=train_dataset,
) 
trainer.train()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 80263
  Num Epochs = 2
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization steps = 80266
  Number of trainable parameters = 140971017



[A[A[A

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 4.00 GiB total capacity; 2.52 GiB already allocated; 204.80 KiB free; 2.64 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

-----
### Prediction

In [None]:
from collections import defaultdict

preds = defaultdict(list)
batch_size = 1
task_name = 'semeval'
dataset = 'test'
val_len = len(features_dict[task_name][dataset])

for index in tqdm(range(0, val_len, batch_size), total=val_len):

    idx = features_dict[task_name][dataset][index]
    inputs = features_dict[task_name][dataset][index]

    overall, attention_mask, input_ids = inputs['labels'], inputs['attention_mask'].unsqueeze(0).to("cuda"), inputs['input_ids'].unsqueeze(0).to("cuda")

    args = {'input_ids': input_ids, 'attention_mask': attention_mask}   
    overall_pred = multitask_model(task_name, **args)


    preds['idx'].append(features_dict[task_name][dataset]['idx'][index])
    preds['overall'].append(overall_pred['logits'].item())

In [None]:
# Evalute Semeval data
nlp.load_metric('glue', name="stsb").compute(
    preds['overall'],
    features_dict[task_name][dataset]['labels'].tolist(),
)

In [None]:
def check_pred(pred):
  if pred >= 4:
    return 4.0000
  elif pred <= 1:
    return 1.0000
  else:
    return pred

In [None]:
# Prediction on test data
# retrieve the test_v1.csv file
main_df = pd.read_csv('/content/drive/MyDrive/SemEval-Akash_Nidhir_Rishikesh/SemEval 2022 - Multilingual Document Similarity/Semeval-Task-8/dataset/test_v1.csv')
predictions = preds['overall']
df = pd.read_csv(path_to_validation)
pair_ids = df['pair_id']
# create new dataframe
pred_df = pd.DataFrame({
    'pair_id': pair_ids,
    'Overall': predictions,
})
# merge data
merged_data = pd.merge(pred_df, main_df, how="outer", on="pair_id")
merged_data.drop(['url1_lang', 'url2_lang', 'link1', 'link2', 'ia_link1', 'ia_link2'], axis=1, inplace=True)
merged_data['Overall'] = merged_data['Overall'].round(4)
# save data
merged_data.to_csv('prediction.csv', index=False)

# processing
merged_data["Proc_Overall"] = merged_data.progress_apply(
    lambda row: check_pred(row['Overall']),
    axis=1
)
merged_data.drop(['Overall'], axis=1, inplace=True)
merged_data.rename(columns={
    "Proc_Overall": "Overall"
}, inplace=True, errors="raise")
merged_data.to_csv("prediction.csv", index=False)