Google Colab Notebook : Used for model Training and Evaluation

Part 2 : Training the GPT2LM Model Using the Tokenized data file
 tokenized_single_turn_conversations_with_masks.pt

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch

# file path in Google Drive
file_path = '/content/drive/MyDrive/Entropy/tokenized_conversations_with_context.pt'

# Load the tokenized data with attention masks
data = torch.load(file_path)
input_ids = data['input_ids']
attention_masks = data['attention_masks']

print("Loaded input_ids and attention_masks:")
print(f"Input IDs shape: {input_ids.shape}")
print(f"Attention Masks shape: {attention_masks.shape}")

  data = torch.load(file_path)


Loaded input_ids and attention_masks:
Input IDs shape: torch.Size([83097, 512])
Attention Masks shape: torch.Size([83097, 512])


In [None]:
from transformers import GPT2LMHeadModel

# Load pre-trained GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Move the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Move loaded tokenized data to GPU
input_ids = input_ids.to(device)
attention_masks = attention_masks.to(device)

print("Tokenized data and attention masks successfully moved to GPU.")
print("Model and data are ready on GPU.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Tokenized data and attention masks successfully moved to GPU.
Model and data are ready on GPU.


Model Design And Training

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
from transformers import GPT2LMHeadModel

# Assuming `input_ids`, `attention_masks`, and `model` are already loaded on the GPU

# Step 3: Create a custom Dataset class and DataLoader
class ConversationDataset(Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_masks[idx]

batch_size = 8  # Adjust based on GPU capacity
train_dataset = ConversationDataset(input_ids, attention_masks)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Step 4: Define optimizer and scaler for mixed precision training
optimizer = AdamW(model.parameters(), lr=3e-5)
scaler = GradScaler()

# Function to save the model, optimizer, and scaler state
def save_checkpoint(epoch, model, optimizer, scaler, path):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scaler_state_dict': scaler.state_dict()
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved after epoch {epoch} to {path}")

# Step 5: Training loop with model, optimizer, and scaler saving after each epoch
epochs = 2  # Modify as needed
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc="Training", leave=False)

    for input_ids_batch, attention_masks_batch in progress_bar:
        input_ids_batch, attention_masks_batch = input_ids_batch.to(device), attention_masks_batch.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = model(input_ids=input_ids_batch, attention_mask=attention_masks_batch, labels=input_ids_batch)
            loss = outputs.loss
            scaler.scale(loss).backward()

        scaler.step(optimizer)
        scaler.update()
        epoch_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    print(f"Epoch {epoch + 1} completed. Average loss: {epoch_loss / len(train_loader)}")

    # Save the model, optimizer, and scaler states after each epoch
    checkpoint_path = f'/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_{epoch + 1}.pt'
    save_checkpoint(epoch + 1, model, optimizer, scaler, checkpoint_path)

print("Training complete!")

  scaler = GradScaler()


Epoch 1/2


  with autocast():


Epoch 1 completed. Average loss: 0.44973561635207204
Checkpoint saved after epoch 1 to /content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_1.pt
Epoch 2/2


Training:  13%|█▎        | 1301/10388 [09:31<1:07:02,  2.26it/s, loss=0.404]

Epoch 2 training from checkpoint 1

In [None]:
import torch
from torch.optim import AdamW
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Define the paths
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_1.pt'

# Load the checkpoint
checkpoint = torch.load(checkpoint_path)

# Load optimizer and scaler
optimizer = AdamW(model.parameters(), lr=3e-5)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scaler = torch.amp.GradScaler()
scaler.load_state_dict(checkpoint['scaler_state_dict'])
print("Optimizer and scaler states loaded.")

# Assuming `input_ids` and `attention_masks` are already loaded on the GPU

# Custom Dataset and DataLoader
class ConversationDataset(Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_masks[idx]

batch_size = 8
train_dataset = ConversationDataset(input_ids, attention_masks)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Function to save the checkpoint
def save_checkpoint(epoch, model, optimizer, scaler, path):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scaler_state_dict': scaler.state_dict()
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved after epoch {epoch} to {path}")

# Set the model to training mode
model.train()

# Training loop for one additional epoch
epochs = 1  # Continuing for one more epoch
for epoch in range(epochs):
    current_epoch = checkpoint['epoch'] + 1
    print(f"Continuing training: Epoch {current_epoch}")
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc="Training", leave=False)

    for input_ids_batch, attention_masks_batch in progress_bar:
        input_ids_batch, attention_masks_batch = input_ids_batch.to('cuda'), attention_masks_batch.to('cuda')
        optimizer.zero_grad()

        # Mixed precision training
        with torch.amp.autocast('cuda'):
            outputs = model(input_ids=input_ids_batch, attention_mask=attention_masks_batch, labels=input_ids_batch)
            loss = outputs.loss
            scaler.scale(loss).backward()

        scaler.step(optimizer)
        scaler.update()
        epoch_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    print(f"Epoch {current_epoch} completed. Average loss: {epoch_loss / len(train_loader)}")

    # Save the model, optimizer, and scaler states after the epoch
    checkpoint_path = f'/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_{current_epoch}.pt'
    save_checkpoint(current_epoch, model, optimizer, scaler, checkpoint_path)

print("Training complete!")


  checkpoint = torch.load(checkpoint_path)


Optimizer and scaler states loaded.
Continuing training: Epoch 2




Epoch 2 completed. Average loss: 0.4390451854634937
Checkpoint saved after epoch 2 to /content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt
Training complete!


This following attempt got interrupted with a runtime end

  scaler = GradScaler()


Epoch 1/2


  with autocast():
Training:  47%|████▋     | 8984/19028 [1:01:59<1:09:02,  2.42it/s, loss=0.371]

In [None]:
# Step 8: Save the trained model after optimization
token_save_path=f'/content/drive/MyDrive/Entropy/optimized_tokenizer'
tokenizer.save_pretrained(token_save_path)

print("tokenizer saved")

tokenizer saved


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Save the trained model and tokenizer to Google Drive

tokenizer.save_pretrained('/content/drive/MyDrive/optimized_tokenizer')

Part 3 : Evaluation

In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
model = GPT2LMHeadModel.from_pretrained('gpt2')
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_length=150, num_return_sequences=1):
    # Tokenize the input prompt with padding and attention mask
    inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)

    # Pass both input_ids and attention_mask to the model
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Generate responses
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,  # Pass the attention mask to the model
        max_length=max_length,  # Adjust maximum length of generated text
        num_return_sequences=num_return_sequences,  # Generate one or more responses
        no_repeat_ngram_size=2,  # Avoid repeating the same n-grams
        do_sample=True,  # Enable sampling for varied responses
        top_k=50,  # Sample from top k tokens
        top_p=0.95,  # Use nucleus sampling (top-p sampling)
        temperature=0.7,  # Lower temperature makes output more deterministic
        pad_token_id=tokenizer.eos_token_id  # Set the pad token to eos_token_id
    )

    # Decode and return the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the chatbot with a user input prompt
prompt = "Hello, how are you?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")

# Try another prompt
prompt = "Can you tell me a joke?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")

# Try another prompt
prompt = "do you like movies?"
response = generate_response(prompt, model, tokenizer)
print(f"Chatbot response: {response}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

  checkpoint = torch.load(checkpoint_path)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



Chatbot response: Hello, how are you?

How are things?  Do you like to play hockey? Do ya like it? You like the game? Come on, come on.
Oh, I know I should say this but I really don't know.  I'm just not ready to go out. I dont know whatI donít wanna go anywhere. Don¹t go to the airport. Its going to be a long, long time. You never know when youre gonna get there. What is the worst thing that could happen? The worst would be that you're not gonna go. Like I said, its gonna be pretty bad. But I'll get
Chatbot response: Can you tell me a joke?

<GUILD (male)

Chatbot response: do you like movies?



In [5]:
!pip install colorama

Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6


In [7]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
model = GPT2LMHeadModel.from_pretrained('gpt2')
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
checkpoint = torch.load(checkpoint_path, weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_new_tokens=100):
    inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Generate the response
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,  # Limit the number of new tokens generated
        no_repeat_ngram_size=2,
        repetition_penalty=1.2,  # Discourage repetitive sequences
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Interactive conversation loop
print("Chatbot is ready! Type 'exit' to end the conversation.")
conversation_history = ""

while True:
    # Get user input
    user_input = input()  # Removed "You: " prompt for a more natural texting experience
    if user_input.lower() == 'exit':
        print("Ending the conversation. Goodbye!")
        break

    # Append the user input to the conversation history
    conversation_history += f"\n{user_input}"

    # Limit conversation history to the most recent exchanges
    conversation_history_list = conversation_history.split("\n")
    recent_history = "\n".join(conversation_history_list[-6:])  # Last 3 exchanges (user + bot)
    prompt_text = recent_history

    # Generate chatbot response
    chatbot_response = generate_response(prompt_text, model, tokenizer)
    conversation_history += f"\n{chatbot_response}"

    # Print chatbot response
    print(chatbot_response)

    # Logging conversation for later evaluation
    with open("conversation_log.txt", "a") as log_file:
        log_file.write(f"{user_input}\n{chatbot_response}\n\n")

    # Collect user feedback on the response
    feedback = input("Rate the bot's response (1-5): ")
    with open("feedback_log.txt", "a") as feedback_file:
        feedback_file.write(f"{user_input} -> {chatbot_response}: Rating: {feedback}\n")

Chatbot is ready! Type 'exit' to end the conversation.
Good Morning, How are you doing today ?

Good Morning, How are you doing today? Are there any changes in your day-tomovie? Or is that something else?
<good morning. I'm gonna be late tonight and go to work.  You know this will be a long ride, but hey! Come on for the ride! Just wait till I get home from my job tomorrow night...I'll have it all figured out by the time we're back at the airport -- just be patient with me here until then -

Rate the bot's response (1-5): 2
lets head out for lunch today, what cuisine to do like ?
Good Morning, How are you doing today?

Good Morning, How are you doing today? Are there any changes in your day-tomovie? Or is that something else?
<good morning. I'm gonna be late tonight and go to work.  You know this will be a long ride, but hey! Come on for the ride! Just wait till I get home from my job tomorrow night...I'll have it all figured out by the time we're back at the airport -- just be patient

KeyboardInterrupt: Interrupted by user

In [3]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
model = GPT2LMHeadModel.from_pretrained('gpt2')
checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
checkpoint = torch.load(checkpoint_path, weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_new_tokens=1000):
    if prompt_text.lower() == 'exit':
        return "Ending the conversation. Goodbye!"

    inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Generate the response
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,  # Limit the number of new tokens generated
        no_repeat_ngram_size=2,
        repetition_penalty=1.2,  # Discourage repetitive sequences
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Interactive conversation loop
print("Chatbot is ready! Type 'exit' to end the conversation.")
conversation_history = ""

while True:
    # Get user input
    user_input = input()  # Removed "You: " prompt for a more natural texting experience
    if user_input.lower() == 'exit':
        print("Ending the conversation. Goodbye!")
        break

    # Append the user input to the conversation history
    conversation_history += f"\n{user_input}"

    # Limit conversation history to the most recent exchanges
    conversation_history_list = conversation_history.split("\n")
    recent_history = "\n".join(conversation_history_list[-6:])  # Last 3 exchanges (user + bot)
    prompt_text = recent_history

    # Generate chatbot response
    chatbot_response = generate_response(prompt_text, model, tokenizer)
    conversation_history += f"\n{chatbot_response}"

    # Print chatbot response
    print(chatbot_response)

    # Logging conversation for later evaluation
    if user_input.lower() != 'exit':
        with open("conversation_log.txt", "a") as log_file:
            log_file.write(f"{user_input}\n{chatbot_response}\n\n")

    # Collect user feedback on the response
    if user_input.lower() != 'exit':
        feedback = input("Rate the bot's response (1-5): ")
        with open("feedback_log.txt", "a") as feedback_file:
            feedback_file.write(f"{user_input} -> {chatbot_response}: Rating: {feedback}\n")


Chatbot is ready! Type 'exit' to end the conversation.
Bounjor how are you ?

Bounjor how are you?  I'm very glad you're here. We've been able to work together for a long time, but we haven't done anything really special. Not even the best of things. For instance: maybe we should buy some clothes. And maybe our father would like us something nice and clean.

Rate the bot's response (1-5): 5
do you like movies ?

Bounjor how are you?

Bounjor how are you?  I'm very glad you're here. We've been able to work together for a long time, but we haven't done anything really special. Not even the best of things. For instance: maybe we should buy some clothes. And maybe our father would like us something nice and clean.

do you like movies? Do not tell anyone about them! But why do they always have this picture in their back pocket that says "You're a great writer" or whatever it is they think 'n' wanna hear your name be on TV!
you can write songs? A song about me?! Or do what my mother tells ya

In [6]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import time

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
try:
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
    checkpoint = torch.load(checkpoint_path, weights_only=True)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")

# Load the tokenizer
try:
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
except Exception as e:
    print(f"Error loading tokenizer: {e}")

# Function to generate chatbot responses with attention mask
def generate_response(prompt_text, model, tokenizer, max_new_tokens=1000):
    if prompt_text.lower() == 'exit':
        return "Ending the conversation. Goodbye!"

    try:
        inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        # Generate the response
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,  # Limit the number of new tokens generated
            no_repeat_ngram_size=2,
            repetition_penalty=1.2,  # Discourage repetitive sequences
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode and return the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response

    except Exception as e:
        print(f"Error generating response: {e}")
        return "Sorry, I encountered an error while generating a response."

# Interactive conversation loop with enhanced logging and memory management
print("Chatbot is ready! Type 'exit' to end the conversation.")
conversation_history = ""

while True:
    # Get user input
    user_input = input()
    if user_input.lower() == 'exit':
        print("Ending the conversation. Goodbye!")
        break

    # Append the user input to the conversation history
    conversation_history += f"\n{user_input}"

    # Limit conversation history to the most recent exchanges (sliding window of last 6 lines)
    conversation_history_list = conversation_history.split("\n")
    recent_history = "\n".join(conversation_history_list[-6:])  # Last 3 exchanges (user + bot)
    prompt_text = recent_history

    # Generate chatbot response
    chatbot_response = generate_response(prompt_text, model, tokenizer)
    conversation_history += f"\n{chatbot_response}"

    # Print chatbot response
    print(chatbot_response)

    # Unified logging of conversation and feedback with timestamps
    if user_input.lower() != 'exit':
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"{timestamp} | User: {user_input} | Bot: {chatbot_response}\n"

        # Write to a single log file for both conversation and feedback
        with open("conversation_and_feedback_log.txt", "a") as log_file:
            log_file.write(log_entry)

        # Collect user feedback on the response
        feedback = input("Rate the bot's response (1-5): ")
        with open("conversation_and_feedback_log.txt", "a") as log_file:
            log_file.write(f"Rating: {feedback}\n\n")

Model loaded successfully.
Chatbot is ready! Type 'exit' to end the conversation.
Good Morning 

Good Morning  Good Evening. Just what I needed...

Rate the bot's response (1-5): 5
how are you doing today ?

Good Morning 

Good Morning  Good Evening. Just what I needed...

how are you doing today? Are your hands warm? Do they feel good right now, or is there something wrong with them...? Is it too late to come in here...?

Rate the bot's response (1-5): no its not too late, come on in please 
5

Good Morning  Good Evening. Just what I needed...

how are you doing today? Are your hands warm? Do they feel good right now, or is there something wrong with them...? Is it too late to come in here...?

5:30-6PM - How about tonight?! What do we have left for the next few hours?? Come and see me at my place! We'll talk tomorrow night as best he can!! Don't worry though; if anything goes awry--I'm fine after all this work that's already done on him so don't even bother to tell anyone... My famil

In [9]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.0-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata

In [10]:
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import time

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and the checkpoint
try:
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    checkpoint_path = '/content/drive/MyDrive/Entropy/gpt2_finetuned_checkpoint_epoch_2.pt'
    checkpoint = torch.load(checkpoint_path, weights_only=True)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")

# Load the tokenizer
try:
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
except Exception as e:
    print(f"Error loading tokenizer: {e}")

# Function to generate chatbot responses with attention mask
def generate_response_gradio(user_input, conversation_history):
    try:
        # Update the conversation history
        conversation_history.append(("User", user_input))

        # Limit conversation history to the most recent exchanges (sliding window of last 6 lines)
        conversation_history_list = [f"{speaker}: {text}" for speaker, text in conversation_history[-6:]]  # Last 3 exchanges (user + bot)
        prompt_text = "\n".join(conversation_history_list)

        # Tokenize and generate the response
        inputs = tokenizer(prompt_text, return_tensors='pt', truncation=True, max_length=512).to(device)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=1000,  # Limit the number of new tokens generated
            no_repeat_ngram_size=2,
            repetition_penalty=1.2,  # Discourage repetitive sequences
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode and return the generated text
        chatbot_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Update the conversation history with the chatbot response
        conversation_history.append(("Bot", chatbot_response))
        return conversation_history

    except Exception as e:
        conversation_history.append(("Bot", "Sorry, I encountered an error while generating a response."))
        return conversation_history

# Gradio Interface
def chat_interface(user_input, conversation_history=[]):
    updated_history = generate_response_gradio(user_input, conversation_history)
    return updated_history, updated_history

# Gradio Input/Output elements
with gr.Blocks() as demo:
    gr.Markdown("### GPT-2 Chatbot")
    chatbot = gr.Chatbot()  # Chatbot display for back-and-forth conversation
    chatbot_history = gr.State([])  # Hidden state to store conversation history
    user_input = gr.Textbox(label="Your Input")

    def chat_with_bot(input_text, chat_history):
        response, new_history = chat_interface(input_text, chat_history)
        return new_history, new_history

    user_input.submit(chat_with_bot, inputs=[user_input, chatbot_history], outputs=[chatbot, chatbot_history])

# Launch Gradio interface
demo.launch(share=True)


Model loaded successfully.




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d85155495634b8a62b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [11]:
from IPython.display import display, Javascript
from google.colab import drive, files
import os
import glob
import time
import nbformat
from nbconvert import HTMLExporter
import uuid
import subprocess

drive.mount('/content/drive')

def get_most_recent_ipynb(path):
    notebooks = glob.glob(os.path.join(path, '*.ipynb'))
    if not notebooks:
        return None
    most_recent_file = max(notebooks, key=os.path.getmtime)
    return most_recent_file

def export_notebook_as_html(notebook_path, checkpoint_name='', export_path='/content/html_exports'):
    with open(notebook_path, 'r') as f:
        notebook_content = nbformat.read(f, as_version=4)

    html_exporter = HTMLExporter()
    html_content, _ = html_exporter.from_notebook_node(notebook_content)

    base_name = os.path.splitext(notebook_path)[0]
    html_output_path = os.path.join(export_path, f"{os.path.basename(base_name)}{checkpoint_name}_{uuid.uuid4().hex[:4]}.html")

    os.makedirs(export_path, exist_ok=True)  # Create directory if it doesn't exist

    with open(html_output_path, 'w') as f:
        f.write(html_content)
    files.download(html_output_path)

def download(checkpoint_name=""):
  if checkpoint_name and not checkpoint_name.startswith('_') and not checkpoint_name.startswith('-'):
    checkpoint_name = '_' + checkpoint_name
  recent_ipynb = get_most_recent_ipynb('/content/drive/My Drive/Colab Notebooks/')
  if recent_ipynb:
      export_notebook_as_html(recent_ipynb, checkpoint_name)

download("test")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  warn(


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>