# **Fine Tune GPT 2**

In [4]:
pip install transformers[torch]

Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu

In [5]:
!pip install transformers_interpret

Collecting transformers_interpret
  Downloading transformers_interpret-0.10.0-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m851.9 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting captum>=0.3.1 (from transformers_interpret)
  Downloading captum-0.7.0-py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Collecting jedi>=0.16 (from ipython<8.0.0,>=7.31.1->transformers_interpret)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, captum, transformers_interpret
Successfully installed captum-0.7.0 jedi-0.19.1 transformers_interpret-0.10.0


Fine Tune

In [None]:
from transformers import Trainer, TrainingArguments, GPT2Tokenizer, GPT2LMHeadModel
from transformers import TextDataset, DataCollatorForLanguageModeling

In [None]:
json_file_path = '/content/drive/MyDrive/Research_Project/Codes/JSON2GPT2/D3000/output.json'
val_json_file_path = '/content/drive/MyDrive/Research_Project/Codes/JSON2GPT2/D3000/val_output.json'

In [None]:
def load_dataset(file_path, tokenizer, block_size=128):
    dataset = TextDataset(
        tokenizer=tokenizer,
        file_path=file_path,
        block_size=block_size,
    )
    return dataset

In [None]:
def load_data_collator(tokenizer, mlm=False):
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=mlm,
    )
    return data_collator

In [None]:
def compute_metrics(p):
    return {"perplexity": p.metrics["eval_loss"]}

In [None]:
gradient_accumulation_steps = 4  # Accumulate gradients over multiple steps
per_device_train_batch_size = 2  # Adjust according to your GPU memory

In [None]:
def train(train_file_path, eval_file_path, model_name,
          output_dir,
          overwrite_output_dir,
          per_device_train_batch_size,
          num_train_epochs,
          save_steps):

    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    train_dataset = load_dataset(train_file_path, tokenizer)
    eval_dataset = load_dataset(eval_file_path, tokenizer)
    data_collator = load_data_collator(tokenizer)

    tokenizer.save_pretrained(output_dir)

    model = GPT2LMHeadModel.from_pretrained(model_name)

    training_args = TrainingArguments(
        output_dir=output_dir,
        overwrite_output_dir=overwrite_output_dir,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="steps",  # Evaluate every save_steps steps
        save_strategy="steps",  # Save model every save_steps steps
        eval_steps=save_steps,
        save_steps=save_steps,
        logging_steps=save_steps,
        save_total_limit=2,
        load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    trainer.save_model()

In [None]:
train_file_path = json_file_path
eval_file_path = val_json_file_path
model_name = 'gpt2'  # here we can use a bigger model like gpt2-medium, gpt2-large etc
output_dir = '/content/drive/MyDrive/Research_Project/Codes/Result2GPT2/D3000'
overwrite_output_dir = False
per_device_train_batch_size = 8
num_train_epochs = 12
save_steps = 5000

In [None]:
train(
    train_file_path=train_file_path,
    eval_file_path=eval_file_path,
    model_name=model_name,
    output_dir=output_dir,
    overwrite_output_dir=overwrite_output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    num_train_epochs=num_train_epochs,
    save_steps=save_steps
)


### **Get Model Predictions**

In [None]:
# Load the fine-tuned model
model_name = '/content/drive/MyDrive/Research_Project/Codes/Result2GPT2/B8E3'
model = GPT2LMHeadModel.from_pretrained(model_name, local_files_only=True)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Input question
input_question = "A farmer has 120 acres of land. He plants corn on 3/4 of the land and soybeans on the rest. How many more acres of corn than soybeans does he plant?"
# Encode the input question
input_ids = tokenizer.encode(f"[Q] {input_question}", return_tensors="pt")

# Generate the equation
output = model.generate(input_ids, max_length=500, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0)
generated_equation = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_equation)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Q] A farmer has 120 acres of land. He plants corn on 3/4 of the land and soybeans on the rest. How many more acres of corn than soybeans does he plant?",
   "[E] Let's denote the number of acres planted as \\( x \\) and the total amount he has to pay for all the corn and corn seeds. According to the information given:\n\n\\[ x = \\frac{120}{3.4} \\times 3x = 120 + 120 = 180 \\]\na = 90 \\text{ acres} \u00f7 90 = 360 \\nx \\cdot 360 = 0.5 \\)\na \\approx \\sqrt{180}{360} = 1.25 \\\nn = 2.75 \\}\nt = 3.375 \\% of \\[ \\left(x - 360)}{2.875} + \\right)(x + 180) = -1.125 \\\\\nz = 4.625 \\^2 + -0.05 \\nz \\u03c0 = 5.075 \\)\ny = 6.025 \\pi = 9.975 \\nn \\nd = 7.325 \\).\nb = (6.275 - -5) + (7.475 -6) \\nr = 8.825 \\nb \\nw = 10.895 \\).", (quoted from the second part of this article)\n- The farmer planted a total of 180 acres in the first year, so he paid a certain amount per acre for each acre planted. Since he planted 2/3 times as many corn as corn, we can set up the following equation 