### Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Loading Dataser

In [3]:

def load_data(file_path):
    pairs = []
    with open(file_path, "r") as f:
        for line in f:
            if '\t' in line:
                input_text, target_text = line.strip().split("\t")
                pairs.append((input_text, target_text))
    return pairs

In [4]:
# Prepare Dataset and DataLoader
data_file = "dialogs.txt"
pairs = load_data(data_file)


In [5]:
class DialogDataset(Dataset):
    def __init__(self, pairs, tokenizer, max_length):
        self.pairs = pairs
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        input_text, target_text = self.pairs[idx]
        input_enc = self.tokenizer(
            input_text,
            max_length=self.max_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )
        target_enc = self.tokenizer(
            target_text,
            max_length=self.max_length,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )
        return input_enc["input_ids"].squeeze(), target_enc["input_ids"].squeeze()

In [6]:
model_name = "facebook/blenderbot-400M-distill"  # Replace with your desired model
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_pairs = pairs[:int(0.8 * len(pairs))]
val_pairs = pairs[int(0.8 * len(pairs)):] 

max_length = 50

train_dataset = DialogDataset(train_pairs, tokenizer, max_length)
val_dataset = DialogDataset(val_pairs, tokenizer, max_length)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)



In [None]:

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, force_download=True)
model.to(device)

In [123]:
# Step 3: Training and Validation
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

In [124]:
def load_model(model, optimizer, path="model_checkpoint2.pth"):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    return model, optimizer

In [125]:
load_model(model, optimizer, path="model_checkpoint2.pth")

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 11.99 GiB of which 0 bytes is free. Of the allocated memory 26.01 GiB is allocated by PyTorch, and 368.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [86]:
def train_fn(loader, model, optimizer):
    model.train()
    total_loss = 0
    for input_ids, target_ids in loader:
        input_ids, target_ids = input_ids.to(device), target_ids.to(device)
        optimizer.zero_grad()

        outputs = model(input_ids=input_ids, labels=target_ids)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)


In [87]:
def eval_fn(loader, model):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for input_ids, target_ids in loader:
            input_ids, target_ids = input_ids.to(device), target_ids.to(device)

            outputs = model(input_ids=input_ids, labels=target_ids)
            loss = outputs.loss

            total_loss += loss.item()
    return total_loss / len(loader)

In [88]:
for epoch in range(5):
    train_loss = train_fn(train_loader, model, optimizer)
    

    print(f"Epoch {epoch + 1}, Train Loss: {train_loss}")

KeyboardInterrupt: 

In [89]:
val_loss = eval_fn(val_loader, model)

print(f" Val Loss: {val_loss}")

KeyboardInterrupt: 

In [90]:
def save_model(model, optimizer, path="model_checkpoint2.pth"):
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, path)

In [91]:
save_model(model, optimizer,  path="model_checkpoint2.pth")

In [92]:
def predict_response(input_text, max_length=50):
    model.eval()
    with torch.no_grad():
        input_enc = tokenizer(
            input_text,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=max_length
        )
        input_ids = input_enc["input_ids"].to(device)

        print("Input IDs:", input_ids)
        print("Input Tokens:", tokenizer.convert_ids_to_tokens(input_ids[0]))

        outputs = model.generate(input_ids, max_length=max_length, num_beams=5)
        print("Generated Output IDs:", outputs)
        print("Generated Tokens:", tokenizer.convert_ids_to_tokens(outputs[0]))

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("Decoded Response:", response)

        return response


In [None]:
predict_response(" i'm attending pcc right now ")

Input IDs: tensor([[   0,  939,  437, 5190,  181, 7309,  235,  122, 1437,    2,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1]], device='cuda:0')
Input Tokens: ['<s>', 'Ġi', "'m", 'Ġattending', 'Ġp', 'cc', 'Ġright', 'Ġnow', 'Ġ', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
Generated Output IDs: tensor([[   2,    0,    0,    0,  118,  437,  164,    7,  213,    7,  334, 3859,
            4,    2]], device='cuda:0')
Generated Tokens: ['</s>', '<s>

"i'm going to go to school tomorrow."