In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate

In [3]:
import torch
from transformers import AutoConfig, AutoModel, GPT2Tokenizer, TextDataset, TrainingArguments
from transformers import DataCollatorForLanguageModeling, Trainer, AutoTokenizer, GPT2LMHeadModel

In [None]:
config = AutoConfig.from_pretrained("/content/drive/MyDrive/NLP/model")
model = GPT2LMHeadModel.from_pretrained("/content/drive/MyDrive/NLP/model")
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')


In [5]:
model.config.attention_dropout = 0.1
model.config.hidden_dropout_prob = 0.1

In [None]:
train_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/mitig_train.txt",
    block_size = 128
)

In [7]:
validation_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/validation.txt",
    block_size = 128
)

In [8]:
test_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/test.txt",
    block_size = 128
)

In [9]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer = tokenizer, mlm=False
)

In [10]:
training_args = TrainingArguments(
    output_dir = "/content/drive/MyDrive/NLP/NEW_model",
    overwrite_output_dir = False,
    num_train_epochs = 3,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 2,
    save_steps = 600,
    save_total_limit = 2,
    logging_dir = "/content/drive/MyDrive/NLP/logs",
    save_strategy = 'steps',
    evaluation_strategy = 'steps',
    eval_steps = 600,
    logging_steps = 100,
    do_train = True,
    do_eval = True,
    load_best_model_at_end = True,
    remove_unused_columns = True,
    weight_decay = 0.01, # L2 Regularization
    warmup_steps = 1000
)

In [11]:
trainer = Trainer(
    model = model,
    args = training_args,
    data_collator = data_collator,
    train_dataset = train_dataset,
    eval_dataset = validation_dataset
)

In [12]:
trainer.train()

Step,Training Loss,Validation Loss
600,3.4017,3.717812
1200,3.4086,3.743768
1800,3.3735,3.743501
2400,3.2359,3.743263
3000,3.392,3.738752
3600,3.1076,3.749642
4200,3.2356,3.7424
4800,3.1186,3.739563
5400,3.1059,3.73621
6000,3.2004,3.735053


TrainOutput(global_step=9435, training_loss=3.197336012734644, metrics={'train_runtime': 2785.8463, 'train_samples_per_second': 13.547, 'train_steps_per_second': 3.387, 'total_flos': 2465290321920000.0, 'train_loss': 3.197336012734644, 'epoch': 3.0})

## EWC

In [11]:
tasks = [{'name': 'task', 'dataset_path': '/content/drive/MyDrive/NLP/mitig_train.txt'}]

In [12]:
# Initializing EWC-specific variables
ewc_lambda = 0.1
prev_model = model.state_dict()
fisher_information = {}

In [13]:
for task in tasks:
  # Set the model in training mode
  model.train()

  # Calculate the task specific loss
  for step, batch in enumerate(train_dataset):
    inputs, labels = batch
    outputs = model(**inputs)
    task_loss = outputs.loss()

    # Calcualte gradients
    task_loss.backward()

    # Update Fisher information
    for name, param in model.named_parameters():
      if name in fisher_information:
        fisher_information[name] += (param.grad.data ** 2).mean()
      else:
        fisher_information[name] = (param.grad.data ** 2).mean()

    # Reset gradients
    optimizer.zero_grad()

    # Add EWC regularization term to the loss
    for name, param in model.named_parameters():
      ewc_term = (fisher_information[name] * (param - prev_model[name]).pow(2)).sum()
      task_loss += (ewc_lamda / 2) * ewc_term

    # Backpropagate and optimize
    task_loss.backward()
    optimizer.step()

  # Update the previous model
  prev_model = model.state_dict()



ValueError: ignored

In [None]:
trainer.train()

Step,Training Loss,Validation Loss
600,3.4079,3.726959
1200,3.4086,3.746835
1800,3.3718,3.744811
2400,3.2345,3.743347
3000,3.3909,3.739234
3600,3.1051,3.752472
4200,3.2299,3.745833
4800,3.1137,3.740594
5400,3.1013,3.738008
6000,3.1951,3.736427


TrainOutput(global_step=9435, training_loss=3.195666524121922, metrics={'train_runtime': 2499.5264, 'train_samples_per_second': 15.099, 'train_steps_per_second': 3.775, 'total_flos': 2465290321920000.0, 'train_loss': 3.195666524121922, 'epoch': 3.0})

In [None]:
results = trainer.evaluate(eval_dataset=test_dataset)

In [None]:
results

In [13]:
trainer.save_model('/content/drive/MyDrive/NLP/final_model')

__Test the model__

In [None]:
model_dir = "/content/drive/MyDrive/NLP/final_model"
model = GPT2LMHeadModel.from_pretrained(model_dir)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

def chat_with_model():
  while True:
    user_input = input("You: ")
    input_ids = tokenizer.encode(user_input, return_tensors='pt')

    response_ids = model.generate(input_ids, max_length=100, num_return_sequences=1,
                                  temperature=0.9, top_k=15, do_sample=True,
                                  pad_token_id=model.config.eos_token_id)

    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    print(f"Model: {response}")

chat_with_model()