# Notebook used for training a pretreined model

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import evaluate
import numpy as np
from transformers import TrainingArguments, Trainer
from datasets import Dataset,DatasetDict
import datautils
import pandas as pd
import fine_tuning_utils

In [2]:
DATASET = "./Dataset/power-gb-train.tsv"
DATA_DIR = "Dataset/"
RES_DIR = "./Results/"
EMBED_DIR = "./Embeddings/"
CHECK_DIR = "./test_trainer/"
DEVICE = datautils.get_device()

In [3]:
X_train,y_train,X_val,y_val,_,_ = datautils.split_holdout_dataset(DATASET)

#dataframe of training data
df_train = pd.DataFrame({"text": X_train, "label": y_train})

#dataframe of validation data
df_val = pd.DataFrame({"text": X_val, "label": y_val})

ds_dict = {'train' : Dataset.from_pandas(df_train),
           'test' : Dataset.from_pandas(df_val)}

dataset = DatasetDict(ds_dict)

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 23944
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 5987
    })
})

In [5]:

tokenizer = AutoTokenizer.from_pretrained("launch/POLITICS",)
model = AutoModelForSequenceClassification.from_pretrained("launch/POLITICS", num_labels=1,device_map='cpu')
model

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at launch/POLITICS and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [6]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

124646401

## Tokenize data for the model

In [7]:
tokenized_dataset= dataset.map(fine_tuning_utils.tokenize_function(tokenizer), batched=True,batch_size=1)

Map:   0%|          | 0/23944 [00:00<?, ? examples/s]

Map:   0%|          | 0/5987 [00:00<?, ? examples/s]

## Train

In [8]:
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

In [9]:
clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])
clf_metrics.compute(predictions=[0, 1, 0], references=[0, 1, 1])


{'accuracy': 0.6666666666666666,
 'f1': 0.6666666666666666,
 'precision': 1.0,
 'recall': 0.5}

In [10]:
for name,param in model.named_parameters():
    if 'roberta' in name:
        param.requires_grad = False
    else : print(name,param.requires_grad,'ESCLUSO')

classifier.dense.weight True ESCLUSO
classifier.dense.bias True ESCLUSO
classifier.out_proj.weight True ESCLUSO
classifier.out_proj.bias True ESCLUSO


In [11]:
for name,param in model.named_parameters():
    print(name,param.requires_grad)

roberta.embeddings.word_embeddings.weight False
roberta.embeddings.position_embeddings.weight False
roberta.embeddings.token_type_embeddings.weight False
roberta.embeddings.LayerNorm.weight False
roberta.embeddings.LayerNorm.bias False
roberta.encoder.layer.0.attention.self.query.weight False
roberta.encoder.layer.0.attention.self.query.bias False
roberta.encoder.layer.0.attention.self.key.weight False
roberta.encoder.layer.0.attention.self.key.bias False
roberta.encoder.layer.0.attention.self.value.weight False
roberta.encoder.layer.0.attention.self.value.bias False
roberta.encoder.layer.0.attention.output.dense.weight False
roberta.encoder.layer.0.attention.output.dense.bias False
roberta.encoder.layer.0.attention.output.LayerNorm.weight False
roberta.encoder.layer.0.attention.output.LayerNorm.bias False
roberta.encoder.layer.0.intermediate.dense.weight False
roberta.encoder.layer.0.intermediate.dense.bias False
roberta.encoder.layer.0.output.dense.weight False
roberta.encoder.layer.

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=fine_tuning_utils.compute_metrics(clf_metrics)
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [13]:
trainer.train()

  0%|          | 0/8979 [00:00<?, ?it/s]

: 