<a href="https://colab.research.google.com/github/ArnabDey1543/LLM_Architecture_in_GenAI/blob/main/FineTuning_LLM_in_GenAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [5]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader

# Load pre-trained model tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

# Custom dataset class
class TextDataset(Dataset):
  def __init__(self, file_path, tokenizer, max_length=1024):
    self.tokenizer = tokenizer
    self.file_path = file_path
    self.max_length = max_length

    # Load and preprocess the dataset
    self.examples =[]
    with open(file_path, 'r') as file:
      lines = file.readlines()
      for line in lines:
        tokens = tokenizer(line, truncation =True, max_length= max_length, padding='max_length')
        self.examples.append(tokens)

  def __len__(self):
    return len(self.examples)

  def __getitem__(self, item):
    return {key: torch.tensor(val[item]) for key, val in self.examples[item].items()}

# Load pre-trained model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Hyperparameters
batch_size = 2
learning_rate = 7e-7
num_epochs = 5
warmup_steps = 30

# Dataloader
dataset = TextDataset('data.txt', tokenizer)
dataloader = DataLoader(dataset, batch_size=batch_size)

# Optimization
optimizer = AdamW(model.parameters(), lr=learning_rate)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=-1)

# Fine-tuning Loop
model.train()
for epoch in range(num_epochs):
  for batch in dataloader:
    outputs = model(**batch, labels = batch['input_ids'])
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

  print(f'Epoch {epoch+1} Loss: {loss.item()}')

print('Fine-tuning completed!')






Epoch 1 Loss: nan
Epoch 2 Loss: nan
Epoch 3 Loss: nan


KeyboardInterrupt: 