## 1.Importing the dependencies

In [1]:
import os
import csv
import math
import random
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import BartTokenizer, BartForConditionalGeneration
from tqdm import tqdm

2025-09-17 16:27:49.273146: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758126469.295833     167 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758126469.302636     167 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## 2.Load Model and Tokenizer 

In [3]:
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large', add_prefix_space=True)

## 3.Configuration

In [4]:
# --- Multi-GPU setup ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs")
    model = torch.nn.DataParallel(model)  # wrap your model
model = model.to(DEVICE)

# --- CSV setup ---
log_file = "training_log.csv"
header = ["epoch", "train_loss", "val_loss"]
if not os.path.exists(log_file):
    with open(log_file, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)

Using 2 GPUs


In [6]:
DATA_FILE = r"/kaggle/input/FD_IN-Abs_CSM_512.xlsx"   # your Excel file with 'source' and 'target' columns
BATCH_SIZE = 4
EPOCHS = 3
LR = 5e-5
MAX_INPUT = 1024
MAX_TARGET = 512

## 3.Dataset

In [7]:
class SummaryDataset(Dataset):
    def __init__(self, tokenizer, sources, targets):
        self.tokenizer = tokenizer
        self.sources = sources
        self.targets = targets

    def __len__(self):
        return len(self.sources)

    def __getitem__(self, idx):
        src = self.sources[idx]
        tgt = self.targets[idx]
        src_enc = self.tokenizer(src, max_length=MAX_INPUT, padding='max_length',
                                 truncation=True, return_tensors='pt')
        tgt_enc = self.tokenizer(tgt, max_length=MAX_TARGET, padding='max_length',
                                 truncation=True, return_tensors='pt')
        labels = tgt_enc['input_ids'].squeeze()
        labels[labels == self.tokenizer.pad_token_id] = -100  # ignore pad in loss
        return {
            'input_ids': src_enc['input_ids'].squeeze(),
            'attention_mask': src_enc['attention_mask'].squeeze(),
            'labels': labels
        }

In [8]:
df = pd.read_excel(DATA_FILE)
df = df.rename(columns={'data': 'source', 'summary': 'target'}) if 'data' in df.columns else df

In [9]:
dataset = SummaryDataset(tokenizer, df['source'].tolist(), df['target'].tolist())

In [10]:
train_len = int(0.8 * len(dataset))
val_len = len(dataset) - train_len
train_ds, val_ds = random_split(dataset, [train_len, val_len])
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

## 4.Training Loop

In [11]:
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

In [None]:
# --- Training loop ---
for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0.0

    for batch in train_loader:
        optimizer.zero_grad()
        outputs = model(input_ids=batch['input_ids'].to(DEVICE),
                attention_mask=batch['attention_mask'].to(DEVICE),
                labels=batch['labels'].to(DEVICE))
        loss = outputs.loss
        if loss.dim() > 0:
            loss = loss.mean()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch}: train loss {avg_train_loss:.4f}")

    # --- Validation ---
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            outputs = model(input_ids=batch['input_ids'].to(DEVICE),
                            attention_mask=batch['attention_mask'].to(DEVICE),
                            labels=batch['labels'].to(DEVICE))
            val_loss += outputs.loss.item()

    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch}: val loss {avg_val_loss:.4f}")

    # --- Save metrics to CSV ---
    with open(log_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([epoch, avg_train_loss, avg_val_loss])

    # --- Save model checkpoint ---
    checkpoint_path = f"model_epoch_{epoch}.pt"
    if isinstance(model, torch.nn.DataParallel):
        torch.save(model.module.state_dict(), checkpoint_path)  # unwrap for DataParallel
    else:
        torch.save(model.state_dict(), checkpoint_path)

    print(f"Saved model checkpoint: {checkpoint_path}")

