In [None]:
#  STEP 1: Imports
import os, random, json, csv, time, gc
import pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns
import torch, chardet
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_scheduler
from transformers import get_cosine_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
import torch.nn.functional as F

#  STEP 2: Reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

# STEP 3: Paths
input_csv = "/content/drive/MyDrive/FIRE/QnA-TRAIN.csv"
base_output_path = "/content/drive/MyDrive/FIRE/fold_outputs"
os.makedirs(base_output_path, exist_ok=True)
cleaned_csv = input_csv.replace(".csv", "-CLEANED.csv")

#  STEP 4: Clean CSV
encoding = chardet.detect(open(input_csv, 'rb').read())['encoding']
with open(input_csv, 'r', encoding=encoding, errors='ignore') as infile, open(cleaned_csv, 'w', newline='', encoding='utf-8') as outfile:
    reader, writer = csv.reader(infile), csv.writer(outfile)
    header = next(reader)
    writer.writerow(header)
    for row in reader:
        if len(row) == len(header):
            writer.writerow(row)

#  STEP 5: Preprocess
raw_df = pd.read_csv(cleaned_csv)
raw_df = raw_df.dropna(subset=["title", "selftext", "MAIN", "comment_body", "relevance"])
raw_df["input_text"] = raw_df["title"].astype(str) + " " + raw_df["selftext"].astype(str) + " " + raw_df["MAIN"].astype(str) + " " + raw_df["comment_body"].astype(str)
df = raw_df[["input_text", "relevance"]].rename(columns={"relevance": "label"})
df["label"] = df["label"].astype(int)

#  STEP 6: Dataset Class
model_name = "microsoft/deberta-v3-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
class CustomDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=256)
        self.labels = labels
    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.encodings['input_ids'][idx]),
            'attention_mask': torch.tensor(self.encodings['attention_mask'][idx]),
            'labels': torch.tensor(self.labels[idx])
        }
    def __len__(self):
        return len(self.labels)

#  STEP 7: Focal Loss
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=0.75, gamma=1.5, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean() if self.reduction == 'mean' else focal_loss.sum()

#  STEP 8: Stratified K-Fold Training
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
all_texts = df["input_text"].tolist()
all_labels = df["label"].tolist()

for fold, (train_idx, val_idx) in enumerate(skf.split(all_texts, all_labels)):
    print(f"\n🌀 Fold {fold+1}")
    fold_path = os.path.join(base_output_path, f"fold_{fold+1}")
    os.makedirs(fold_path, exist_ok=True)

    train_texts = [all_texts[i] for i in train_idx]
    train_labels = [all_labels[i] for i in train_idx]
    val_texts = [all_texts[i] for i in val_idx]
    val_labels = [all_labels[i] for i in val_idx]

    train_dataset = CustomDataset(train_texts, train_labels)
    val_dataset = CustomDataset(val_texts, val_labels)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    loss_fn = FocalLoss()
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16)
    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
    scaler = GradScaler(enabled=torch.cuda.is_available())
    num_epochs = 10
    scheduler = get_cosine_schedule_with_warmup(
        optimizer, num_warmup_steps=500, num_training_steps=num_epochs * len(train_loader)
    )

    best_f1 = 0
    patience = 2
    patience_counter = 0
    best_threshold = 0.5

    for epoch in range(num_epochs):
        start = time.time()
        model.train()
        all_preds, all_labels_epoch = [], []

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} Training"):
            batch = {k: v.to(device) for k, v in batch.items()}
            optimizer.zero_grad()

            with autocast(enabled=torch.cuda.is_available()):
                outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
                loss = loss_fn(outputs.logits, batch['labels'])

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            all_preds += torch.argmax(outputs.logits, axis=1).tolist()
            all_labels_epoch += batch['labels'].tolist()

        train_f1 = f1_score(all_labels_epoch, all_preds, average='weighted')
        train_acc = accuracy_score(all_labels_epoch, all_preds)

        #  Validation
        model.eval()
        val_logits, val_labels_actual = [], []
        with torch.no_grad():
            for batch in val_loader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
                val_logits += outputs.logits.tolist()
                val_labels_actual += batch['labels'].tolist()

        val_logits = torch.tensor(val_logits)
        probs = torch.softmax(val_logits, dim=1)[:, 1].numpy()

        best_epoch_f1 = 0
        for threshold in np.arange(0.3, 0.71, 0.01):
            preds = (probs >= threshold).astype(int)
            f1 = f1_score(val_labels_actual, preds, average='weighted')
            if f1 > best_epoch_f1:
                best_epoch_f1 = f1
                best_threshold = threshold

        val_preds = (probs >= best_threshold).astype(int)
        val_f1 = f1_score(val_labels_actual, val_preds, average='weighted')
        val_acc = accuracy_score(val_labels_actual, val_preds)

        print(f"\n Epoch {epoch+1} |  {(time.time()-start)/60:.2f} min | Train F1: {train_f1:.4f}, Acc: {train_acc:.4f} | Val F1: {val_f1:.4f}, Acc: {val_acc:.4f} | Best Thresh: {best_threshold:.2f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            patience_counter = 0
            model.save_pretrained(os.path.join(fold_path, "best_model"))
            tokenizer.save_pretrained(os.path.join(fold_path, "best_model"))
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(" Early stopping triggered.")
                break

    #  Final Report
    model.eval()
    y_pred, y_true = [], []
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
            probs = torch.softmax(outputs.logits, dim=1)[:, 1]
            y_pred += (probs >= best_threshold).int().tolist()
            y_true += batch['labels'].tolist()

    report = classification_report(y_true, y_pred, output_dict=True)
    with open(os.path.join(fold_path, "final_report.json"), "w") as f:
        json.dump(report, f, indent=2)

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Relevant", "Relevant"], yticklabels=["Not Relevant", "Relevant"])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.savefig(os.path.join(fold_path, "confusion_matrix.png"))
    plt.close()

    #  Clear GPU Cache
    del model
    torch.cuda.empty_cache()
    gc.collect()


In [None]:
import os
import json
import pandas as pd
from tabulate import tabulate

# Base path
base_path = "/content/drive/MyDrive/FIRE/Task2/fold_outputs"

# Setup
summary_rows = []
thresholds = []

print("\nReading Fold Results from JSON Files...\n")

# Loop through 3 folds
for fold in range(1, 4):
    print(f"\n{'=' * 60}")
    print(f"Fold {fold} Report")

    report_path = os.path.join(base_path, f"fold_{fold}", "final_report.json")

    if not os.path.exists(report_path):
        print(f"Not found: {report_path}")
        continue

    with open(report_path, "r") as f:
        report = json.load(f)

    best_f1 = round(report.get("best_f1", 0), 4)
    best_threshold = round(report.get("best_threshold", 0), 4)
    epoch = report.get("epoch", "N/A")

    print(f"F1 Score      : {best_f1}")
    print(f"Threshold     : {best_threshold}")
    print(f"Best Epoch    : {epoch}")

    summary_rows.append({
        "Fold": fold,
        "F1 Score": best_f1,
        "Threshold": best_threshold,
        "Epoch": epoch
    })

    thresholds.append(best_threshold)

# Display Summary
if summary_rows:
    df = pd.DataFrame(summary_rows)
    print("\nFold-wise Summary:\n")
    print(tabulate(df, headers="keys", tablefmt="github", showindex=False))

    print("\nOverall:")
    print(f"Avg F1 Score   : {df['F1 Score'].mean():.4f}")
    print(f"Avg Threshold  : {sum(thresholds) / len(thresholds):.4f}")
else:
    print("\nNo fold data found.")

# Optional: Ensemble Results
ensemble_path = os.path.join(base_path, "final_ensemble_report.json")

if os.path.exists(ensemble_path):
    with open(ensemble_path, "r") as f:
        result = json.load(f)

    print("\nEnsemble Final Report:")
    print(f"Ensemble F1 Score  : {result['final_f1']:.4f}")
    print(f"Ensemble Accuracy  : {result['final_accuracy']:.4f}")
    print(f"Threshold Used     : {result['final_threshold']:.4f}")
else:
    print("\nNo 'final_ensemble_report.json' found.")



🔍 Reading Fold Results from JSON Files...


📁 Fold 1 Report
✅ F1 Score      : 0.8556
✅ Threshold     : 0.7
✅ Best Epoch    : 3

📁 Fold 2 Report
✅ F1 Score      : 0.8565
✅ Threshold     : 0.7
✅ Best Epoch    : 2

📁 Fold 3 Report
✅ F1 Score      : 0.8573
✅ Threshold     : 0.7
✅ Best Epoch    : 4

📊 Fold-wise Summary:

|   Fold |   F1 Score |   Threshold |   Epoch |
|--------|------------|-------------|---------|
|      1 |     0.8556 |         0.7 |       3 |
|      2 |     0.8565 |         0.7 |       2 |
|      3 |     0.8573 |         0.7 |       4 |

📌 Overall:
🎯 Avg F1 Score   : 0.8565
🎯 Avg Threshold  : 0.7000

ℹ️ No `final_ensemble_report.json` found.
