In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate

In [2]:
import torch
from transformers import AutoConfig, AutoModel, GPT2Tokenizer, TextDataset, TrainingArguments
from transformers import DataCollatorForLanguageModeling, Trainer, AutoTokenizer, GPT2LMHeadModel

In [None]:
config = AutoConfig.from_pretrained("/content/drive/MyDrive/NLP/final_model")
model = GPT2LMHeadModel.from_pretrained("/content/drive/MyDrive/NLP/final_model")
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')


In [4]:
model.config.attention_dropout = 0.1
model.config.hidden_dropout_prob = 0.1

In [5]:
train_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/dataset_1.csv",
    block_size = 128
)



In [6]:
validation_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/validation.csv",
    block_size = 128
)

In [None]:
test_dataset = TextDataset(
    tokenizer = tokenizer,
    file_path = "/content/drive/MyDrive/NLP/test.csv",
    block_size = 128
)

In [7]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer = tokenizer, mlm=False
)

In [8]:
training_args = TrainingArguments(
    output_dir = "/content/drive/MyDrive/NLP/trained_again_model",
    overwrite_output_dir = False,
    num_train_epochs = 3,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 2,
    save_steps = 4000,
    save_total_limit = 1,
    logging_dir = "/content/drive/MyDrive/NLP/logs",
    save_strategy = 'steps',
    evaluation_strategy = 'steps',
    eval_steps = 4000,
    logging_steps = 1000,
    do_train = True,
    do_eval = True,
    load_best_model_at_end = True,
    remove_unused_columns = True,
    weight_decay = 0.01, # L2 Regularization
    warmup_steps = 2000
)

In [9]:
trainer = Trainer(
    model = model,
    args = training_args,
    data_collator = data_collator,
    train_dataset = train_dataset,
    eval_dataset = validation_dataset
)

In [10]:
trainer.train()

Step,Training Loss,Validation Loss
4000,2.3011,2.182824
8000,2.2071,2.110968
12000,2.1466,2.075742
16000,2.1245,2.047244
20000,2.0986,2.028044
24000,1.9875,2.018247
28000,1.9812,2.005464
32000,1.9523,1.994099
36000,1.9662,1.983708
40000,1.9587,1.974724


FailedPreconditionError: ignored

In [None]:
trainer.save_model('/content/drive/MyDrive/NLP/trained_again_model')

In [None]:
results = trainer.evaluate(eval_dataset=test_dataset)

In [None]:
results

__Test the model__

In [3]:
model_dir = "/content/drive/MyDrive/NLP/trained_again_model/checkpoint-40000"
model = GPT2LMHeadModel.from_pretrained(model_dir)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

def chat_with_model():
  while True:
    user_input = input("You: ")
    input_ids = tokenizer.encode(user_input, return_tensors='pt')

    response_ids = model.generate(input_ids, max_length=200, num_return_sequences=1,
                                  temperature=0.9, top_k=20, do_sample=True,
                                  pad_token_id=model.config.eos_token_id)

    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    print(f"Model: {response}")

chat_with_model()

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

You: How to become rich?
Model: How to become rich? You’re already wealthy, don’t you think?'
 'As an AI language model, I cannot have opinions, but I can provide some insights on how to become rich:\n\n1. Set a realistic goal: Identify the most important qualities you want to achieve in your life in order to become wealthy.\n\n2. Create a career: Create a career that will allow you to work on specific projects, projects, or projects that you are passionate about or passionate about.\n\n3. Build a business: Build a business that offers unique services, products or services that align with your values and interests.\n\n4. Take a risk: Take risks and start small to make a difference or take risks that will lead to success.\n\n5. Invest wisely: Invest in yourself: Make small contributions to a business that creates value, and take risks like buying a car, building a business
You: What is cryptocurrency?
Model: What is cryptocurrency? Can it be trusted or is it illegal to use?'
 'As an AI 

KeyboardInterrupt: ignored