Google Colab Notebook : Used for model Training and Evaluation

Part 2 : Training the GPT2LM Model Using the Tokenized data file
 tokenized_single_turn_conversations_with_masks.pt

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch

# file path in Google Drive
file_path = '/content/drive/MyDrive/Entropy/tokenized_conversations_with_context.pt'

# Load the tokenized data with attention masks
data = torch.load(file_path)
input_ids = data['input_ids']
attention_masks = data['attention_masks']

print("Loaded input_ids and attention_masks:")
print(f"Input IDs shape: {input_ids.shape}")
print(f"Attention Masks shape: {attention_masks.shape}")

  data = torch.load(file_path)


Loaded input_ids and attention_masks:
Input IDs shape: torch.Size([83097, 512])
Attention Masks shape: torch.Size([83097, 512])


In [3]:
from transformers import GPT2LMHeadModel

# Load pre-trained GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Move the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Move loaded tokenized data to GPU
input_ids = input_ids.to(device)
attention_masks = attention_masks.to(device)

print("Tokenized data and attention masks successfully moved to GPU.")
print("Model and data are ready on GPU.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Tokenized data and attention masks successfully moved to GPU.
Model and data are ready on GPU.


Model Design And Training

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
from transformers import GPT2LMHeadModel

# Assuming `input_ids`, `attention_masks`, and `model` are already loaded on the GPU

# Step 3: Create a custom Dataset class and DataLoader
class ConversationDataset(Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_masks[idx]

batch_size = 8  # Adjust based on GPU capacity
train_dataset = ConversationDataset(input_ids, attention_masks)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Step 4: Define optimizer and scaler for mixed precision training
optimizer = AdamW(model.parameters(), lr=3e-5)
scaler = GradScaler()

# Function to save the model, optimizer, and scaler state
def save_checkpoint(epoch, model, optimizer, scaler, path):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scaler_state_dict': scaler.state_dict()
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved after epoch {epoch} to {path}")

# Step 5: Training loop with model, optimizer, and scaler saving after each epoch
epochs = 2  # Modify as needed
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc="Training", leave=False)

    for input_ids_batch, attention_masks_batch in progress_bar:
        input_ids_batch, attention_masks_batch = input_ids_batch.to(device), attention_masks_batch.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = model(input_ids=input_ids_batch, attention_mask=attention_masks_batch, labels=input_ids_batch)
            loss = outputs.loss
            scaler.scale(loss).backward()

        scaler.step(optimizer)
        scaler.update()
        epoch_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    print(f"Epoch {epoch + 1} completed. Average loss: {epoch_loss / len(train_loader)}")

    # Save the model, optimizer, and scaler states after each epoch
    checkpoint_path = f'/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_{epoch + 1}.pt'
    save_checkpoint(epoch + 1, model, optimizer, scaler, checkpoint_path)

print("Training complete!")

  scaler = GradScaler()


Epoch 1/2


  with autocast():


Epoch 1 completed. Average loss: 0.44973561635207204
Checkpoint saved after epoch 1 to /content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_1.pt
Epoch 2/2


Training:  13%|█▎        | 1301/10388 [09:31<1:07:02,  2.26it/s, loss=0.404]

Epoch 2 training from checkpoint 1

In [4]:
import torch
from torch.optim import AdamW
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Define the paths
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_1.pt'

# Load the checkpoint
checkpoint = torch.load(checkpoint_path)

# Load optimizer and scaler
optimizer = AdamW(model.parameters(), lr=3e-5)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scaler = torch.amp.GradScaler()
scaler.load_state_dict(checkpoint['scaler_state_dict'])
print("Optimizer and scaler states loaded.")

# Assuming `input_ids` and `attention_masks` are already loaded on the GPU

# Custom Dataset and DataLoader
class ConversationDataset(Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_masks[idx]

batch_size = 8
train_dataset = ConversationDataset(input_ids, attention_masks)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Function to save the checkpoint
def save_checkpoint(epoch, model, optimizer, scaler, path):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scaler_state_dict': scaler.state_dict()
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved after epoch {epoch} to {path}")

# Set the model to training mode
model.train()

# Training loop for one additional epoch
epochs = 1  # Continuing for one more epoch
for epoch in range(epochs):
    current_epoch = checkpoint['epoch'] + 1
    print(f"Continuing training: Epoch {current_epoch}")
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc="Training", leave=False)

    for input_ids_batch, attention_masks_batch in progress_bar:
        input_ids_batch, attention_masks_batch = input_ids_batch.to('cuda'), attention_masks_batch.to('cuda')
        optimizer.zero_grad()

        # Mixed precision training
        with torch.amp.autocast('cuda'):
            outputs = model(input_ids=input_ids_batch, attention_mask=attention_masks_batch, labels=input_ids_batch)
            loss = outputs.loss
            scaler.scale(loss).backward()

        scaler.step(optimizer)
        scaler.update()
        epoch_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    print(f"Epoch {current_epoch} completed. Average loss: {epoch_loss / len(train_loader)}")

    # Save the model, optimizer, and scaler states after the epoch
    checkpoint_path = f'/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_{current_epoch}.pt'
    save_checkpoint(current_epoch, model, optimizer, scaler, checkpoint_path)

print("Training complete!")


  checkpoint = torch.load(checkpoint_path)


Optimizer and scaler states loaded.
Continuing training: Epoch 2




Epoch 2 completed. Average loss: 0.4390451854634937
Checkpoint saved after epoch 2 to /content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt
Training complete!


This following attempt got interrupted with a runtime end

  scaler = GradScaler()


Epoch 1/2


  with autocast():
Training:  47%|████▋     | 8984/19028 [1:01:59<1:09:02,  2.42it/s, loss=0.371]

In [None]:
# Step 8: Save the trained model after optimization
token_save_path=f'/content/drive/MyDrive/Entropy/optimized_tokenizer'
tokenizer.save_pretrained(token_save_path)

print("tokenizer saved")

tokenizer saved


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Save the trained model and tokenizer to Google Drive

tokenizer.save_pretrained('/content/drive/MyDrive/optimized_tokenizer')

Part 3 : Evaluation

In [4]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
model = GPT2LMHeadModel.from_pretrained('gpt2')
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_length=150, num_return_sequences=1):
    # Tokenize the input prompt with padding and attention mask
    inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)

    # Pass both input_ids and attention_mask to the model
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Generate responses
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,  # Pass the attention mask to the model
        max_length=max_length,  # Adjust maximum length of generated text
        num_return_sequences=num_return_sequences,  # Generate one or more responses
        no_repeat_ngram_size=2,  # Avoid repeating the same n-grams
        do_sample=True,  # Enable sampling for varied responses
        top_k=50,  # Sample from top k tokens
        top_p=0.95,  # Use nucleus sampling (top-p sampling)
        temperature=0.7,  # Lower temperature makes output more deterministic
        pad_token_id=tokenizer.eos_token_id  # Set the pad token to eos_token_id
    )

    # Decode and return the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the chatbot with a user input prompt
prompt = "Hello, how are you?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")

# Try another prompt
prompt = "Can you tell me a joke?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")

# Try another prompt
prompt = "do you like movies?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")


  checkpoint = torch.load(checkpoint_path)


Chatbot response: Hello, how are you?

<BRUCE (male)

Chatbot response: Can you tell me a joke?

If you're the one who said it, you'll find out what it was.
There's no telling what you've done. No one has ever told you a funny story. They'll tell you the truth. And they'll laugh at you. But you gotta be a liar. That's what I mean. You gotta tell the lie. Tell the story the way you wanna tell it. Like a lot of people do. If you don't tell a lie, that's gonna be your problem. Your problem is it's too important to you to tell your truth, so you just tell some bullshit. I dont have to lie to get this job. Now you got the money
Chatbot response: do you like movies?



In [8]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
model = GPT2LMHeadModel.from_pretrained('gpt2')
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_length=300):
    inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Generate the response
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        no_repeat_ngram_size=2,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Interactive conversation loop
print("Chatbot is ready! Type 'exit' to end the conversation.")
conversation_history = ""

while True:
    # Get user input
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        print("Ending the conversation. Goodbye!")
        break

    # Append the user input to the conversation history
    conversation_history += f"\nYou: {user_input}"

    # Generate chatbot response
    prompt_text = conversation_history  # Feed the entire conversation history
    chatbot_response = generate_response(prompt_text, model, tokenizer)
    conversation_history += f"\nBot: {chatbot_response}"

    # Print chatbot response
    print(f"Bot: {chatbot_response}")


  checkpoint = torch.load(checkpoint_path)


Chatbot is ready! Type 'exit' to end the conversation.
You: hi, how are you ?
Bot: 
You: hi, how are you?

Me: How are things?
How are they?  Where are we going?  I'm just going to stay here.
What do you mean? Where's my sister? What is she doing? I haven't said anything, have I? How do I know? What do we do? You know, I don't know. I mean, if I could just tell you that I was here, you'd know I didn't have to tell anyone. So you just sort of --

You: lets go out for a movie today ?
Bot: 
You: hi, how are you?
Bot: 
You: hi, how are you?

Me: How are things?
How are they?  Where are we going?  I'm just going to stay here.
What do you mean? Where's my sister? What is she doing? I haven't said anything, have I? How do I know? What do we do? You know, I don't know. I mean, if I could just tell you that I was here, you'd know I didn't have to tell anyone. So you just sort of --

You: lets go out for a movie today?  Can I come in? Will you come out? When we get there, we'll be there? We'll m

ValueError: Input length of input_ids is 445, but `max_length` is set to 300. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [9]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.25.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata

In [6]:
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Load your fine-tuned checkpoint safely
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_1.pt'
checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)  # Set weights_only=True
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Define the chatbot response function
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors='pt', truncation=True, max_length=512, clean_up_tokenization_spaces=True).to(device)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=150,
        no_repeat_ngram_size=2,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio interface with updated components API
interface = gr.Interface(fn=generate_response,
                         inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
                         outputs=gr.Textbox(label="Chatbot Response"),
                         title="Chatbot")

# Launch the Gradio app
interface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://50eb8513a8714bffc0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


