In [23]:
import json
import pandas as pd
with open('train.json', 'r', encoding='utf-8', errors='ignore') as file:
    train = json.load(file)
# Since it's too computationally expensive to run this model I won't be doing any validation testing.
# with open('val.json', 'r', encoding='utf-8', errors='ignore') as file:
#     val = json.load(file)

with open('test.json', 'r', encoding='utf-8', errors='ignore') as file:
    test = json.load(file)

# Convert to DataFrame
df_train = pd.DataFrame(train)
# df_val = pd.DataFrame(val)
df_test = pd.DataFrame(test)

In [24]:
print(df_train.head())
# print(df_val.head())
print(df_test.head())

         id                                            summary  \
0  13818513  Amanda baked cookies and will bring Jerry some...   
1  13728867  Olivia and Olivier are voting for liberals in ...   
2  13681000  Kim may try the pomodoro technique recommended...   
3  13730747  Edward thinks he is in love with Bella. Rachel...   
4  13728094  Sam is confused, because he overheard Rick com...   

                                            dialogue  
0  Amanda: I baked  cookies. Do you want some?\r\...  
1  Olivia: Who are you voting for in this electio...  
2  Tim: Hi, what's up?\r\nKim: Bad mood tbh, I wa...  
3  Edward: Rachel, I think I'm in ove with Bella....  
4  Sam: hey  overheard rick say something\r\nSam:...  
         id                                            summary  \
0  13862856  Hannah needs Betty's number but Amanda doesn't...   
1  13729565  Eric and Rob are going to watch a stand-up on ...   
2  13680171  Lenny can't decide which trousers to buy. Bob ...   
3  137294

In [25]:
# !pip install transformers torch

In [26]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer, BartForConditionalGeneration, BartForConditionalGeneration, BartConfig
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

In [None]:

# Define the dataset class for conversations
class ConversationDataset(Dataset):
    def __init__(self, dialogues, summaries, tokenizer, max_input_length=1024, max_target_length=150):
        self.dialogues = dialogues
        self.summaries = summaries
        self.tokenizer = tokenizer
        self.max_input_length = max_input_length
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.dialogues)

    def __getitem__(self, idx):
        # Get the dialogue and summary for the given index
        dialogue = self.dialogues[idx]
        summary = self.summaries[idx]

        # Tokenize the dialogue and summary
        input_encodings = self.tokenizer(dialogue,
                                          max_length=self.max_input_length,
                                          truncation=True,
                                          padding='max_length',
                                          return_tensors="pt")
        
        target_encodings = self.tokenizer(summary,
                                           max_length=self.max_target_length,
                                           truncation=True,
                                           padding='max_length',
                                           return_tensors="pt")

        return {
            'input_ids': input_encodings['input_ids'].squeeze(0),  # Remove the batch dimension
            'attention_mask': input_encodings['attention_mask'].squeeze(0),
            'labels': target_encodings['input_ids'].squeeze(0)  # Ensure correct shape
        }

# Initialize the model and tokenizer
config = BartConfig(
    encoder_layers=4,  # Number of encoder layers
    decoder_layers=4)
model = BartForConditionalGeneration(config)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Prepare the dataset
train_dataset = ConversationDataset(df_train['dialogue'].tolist(), df_train['summary'].tolist(), tokenizer)



# Move the model to the appropriate device (GPU or CPU)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# Define the training function
def train_model(model, epochs=3, lr=0.0001):
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    scaler = GradScaler()

    for epoch in range(epochs):
        train_dataloader = DataLoader(train_dataset, batch_size=6, shuffle=True)
        model.train()
        total_loss = 0

        for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()

            with autocast():
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            # Clear memory cache after each batch
            torch.cuda.empty_cache()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_dataloader)
        print(f"Epoch: {epoch + 1}, Average Loss: {avg_loss:.4f}")




In [None]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
if torch.cuda.is_available():
    print("GPU device name:", torch.cuda.get_device_name(0))

In [15]:
# Train the model
train_model(model)

  scaler = GradScaler()
  with autocast():
Epoch 1:   0%|                                                                                | 0/2456 [00:00<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [5]:
# Save the model and tokenizer
# model.save_pretrained("save/model3")
# tokenizer.save_pretrained("save/model3")

In [1]:
# !pip install datasets

# !pip install evaluate absl-py nltk rouge_score



In [10]:
import evaluate

df_train = df_train.sample(n=300, random_state=42)
# Define the test dataset and dataloader
test_dataset = ConversationDataset(df_train['dialogue'].tolist(), df_train['summary'].tolist(), tokenizer)
test_dataloader = DataLoader(test_dataset, batch_size=6, shuffle=False)

# Set model to evaluation mode
model.eval()

# Load ROUGE metric
rouge = evaluate.load("rouge")

predictions = []
references = []

# Generate predictions and collect references
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc="Testing"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        # Generate predictions
        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=150, num_beams=4)
        
        # Decode predictions and references
        decoded_preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        decoded_refs = tokenizer.batch_decode(batch['labels'], skip_special_tokens=True)

        predictions.extend(decoded_preds)
        references.extend(decoded_refs)

# Compute ROUGE scores
results_training = rouge.compute(predictions=predictions, references=references, use_stemmer=True)

Testing: 100%|█████████████████████████████████████████████████████████████████████████| 50/50 [03:48<00:00,  4.57s/it]


In [11]:
test_dataset = ConversationDataset(df_test['dialogue'].tolist(), df_test['summary'].tolist(), tokenizer)
test_dataloader = DataLoader(test_dataset, batch_size=6, shuffle=False)

# Set model to evaluation mode
model.eval()

# Load ROUGE metric
rouge = evaluate.load("rouge")

predictions = []
references = []

# Generate predictions and collect references
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc="Testing"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        # Generate predictions
        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=150, num_beams=4)
        
        # Decode predictions and references
        decoded_preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        decoded_refs = tokenizer.batch_decode(batch['labels'], skip_special_tokens=True)

        predictions.extend(decoded_preds)
        references.extend(decoded_refs)

# Compute ROUGE scores
results_test = rouge.compute(predictions=predictions, references=references, use_stemmer=True)

Testing: 100%|███████████████████████████████████████████████████████████████████████| 137/137 [11:28<00:00,  5.03s/it]


In [12]:
for key, value in results_training.items():
    print(f"{key}: {value:.4f}")

rouge1: 0.5737
rouge2: 0.3470
rougeL: 0.4969
rougeLsum: 0.4973


In [13]:
for key, value in results_test.items():
    print(f"{key}: {value:.4f}")

rouge1: 0.4969
rouge2: 0.2462
rougeL: 0.4081
rougeLsum: 0.4080


In [21]:
conversations = [
    "A: Hi Tom, are you busy tomorrow’s afternoon?\r\nB: I’m pretty sure I am. What’s up?\r\nA: Can you go with me to the animal shelter?.\r\nB: What do you want to do?\r\nA: I want to get a puppy for my son.\r\nB: That will make him so happy.\r\nA: Yeah, we’ve discussed it many times. I think he’s ready now.\r\nB: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \r\nA: I'll get him one of those little dogs.\r\nB: One that won't grow up too big;-)\r\nA: And eat too much;-))\r\nB: Do you know which one he would like?\r\nA: Oh, yes, I took him there last Monday. He showed me one that he really liked.\r\nB: I bet you had to drag him away.\r\nA: He wanted to take it home right away ;-).\r\nB: I wonder what he'll name it.\r\nA: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))",
    "Emma: I’ve just fallen in love with this advent calendar! Awesome! I wanna one for my kids!\r\nRob: I used to get one every year as a child! Loved them! \r\nEmma: Yeah, i remember! they were filled with chocolates!\r\nLauren: they are different these days! much more sophisticated! Haha!\r\nRob: yeah, they can be fabric/ wooden, shop bought/ homemade, filled with various stuff\r\nEmma: what do you fit inside?\r\nLauren: small toys, Christmas decorations, creative stuff, hair bands & clips, stickers, pencils & rubbers, small puzzles, sweets\r\nEmma: WOW! That’s brill! X\r\nLauren: i add one more very special thing as well- little notes asking my children to do something nice for someone else\r\nRob: i like that! My sister adds notes asking her kids questions about christmas such as What did the 3 wise men bring? etc\r\nLauren: i reckon it prepares them for Christmas \r\nEmma: and makes it more about traditions and being kind to other people\r\nLauren: my children get very excited every time they get one!\r\nEmma: i can see why! :)",
]

# Tokenize and generate summaries
model.eval()  # Set the model to evaluation mode
summaries = []
for convo in conversations:
    inputs = tokenizer(convo, max_length=1024, truncation=True, return_tensors="pt").to(device)
    with torch.no_grad():
        summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=150, early_stopping=True)
    # Decode the summary and add it to the list
    summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))


for i, conversation in enumerate(conversations):
    print(f" Conversation {i + 1} \n{conversation}\n")

 Conversation 1 
A: Hi Tom, are you busy tomorrow’s afternoon?
B: I’m pretty sure I am. What’s up?
A: Can you go with me to the animal shelter?.
B: What do you want to do?
A: I want to get a puppy for my son.
B: That will make him so happy.
A: Yeah, we’ve discussed it many times. I think he’s ready now.
B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) 
A: I'll get him one of those little dogs.
B: One that won't grow up too big;-)
A: And eat too much;-))
B: Do you know which one he would like?
A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
B: I bet you had to drag him away.
A: He wanted to take it home right away ;-).
B: I wonder what he'll name it.
A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))

 Conversation 2 
Emma: I’ve just fallen in love with this advent calendar! Awesome! I wanna one for my kids!
Rob: I used to get one every year as a child! Loved them! 
Emma: Yeah, i remember! th

In [26]:
print("------SUMMARY RESULTS------")
for i, summary in enumerate(summaries):
    print(f"Conversation {i + 1} \n Summary: {summary} \n")

------SUMMARY RESULTS------
Conversation 1 
 Summary: B will go with A to the animal shelter tomorrow afternoon. A wants to get a puppy for her son. A took him there last Monday. He liked the one that he liked. He will name it Lemmy after his dead hamster. 

Conversation 2 
 Summary: Emma wants an advent calendar for her kids. Rob used to get one every year as a child. Lauren adds notes for her children. 

