In [3]:
import torch
import os
import matplotlib.pyplot as plt
from data_loader import CrossModalDatasetLoader
from models.multimodal_model import CrossModalModel
from trainer.train import Trainer
from evaluation.captioning_evaluator import CaptioningEvaluator
from evaluation.vqa_evaluator import VQAEvaluator
from evaluation.efficiency_evaluator import EfficiencyEvaluator
from utils.config_loader import load_config

# Clear GPU cache
torch.cuda.empty_cache()
print("✅ GPU cache cleared.")

# Load config yaml (adjust path if needed)
config = load_config('./configs/config.yaml')
print(f"Using device: {torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'}")

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load datasets
dataset_loader = CrossModalDatasetLoader(config)
train_dataset = dataset_loader.load_coco(split="train")
val_dataset = dataset_loader.load_coco(split="val")
print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

# Initialize model and trainer
model = CrossModalModel(device=device, rank_k=config['rank_k']).to(device)
trainer = Trainer(model=model, dataset=train_dataset, config=config, device=device)

# Run training
trainer.train()

# Save final model checkpoint
save_path = "./results/final_model.pth"
torch.save(model.state_dict(), save_path)
print(f"✅ Model saved at {save_path}")

# Run Captioning Evaluation
caption_eval = CaptioningEvaluator(
    ground_truth_path=os.path.join(config['datasets_path'], "nocaps/ground_truth_coco_val2017.json"),
    predictions_path="./results/generated_captions_coco_val2017.json",
    save_dir="./results/captioning/"
)
caption_eval.evaluate()

# Run VQA Evaluation
vqa_eval = VQAEvaluator(
    ground_truth_path=os.path.join(config['datasets_path'], "vqa2/ground_truth.json"),
    predictions_path="./results/vqa_predictions.json",
    save_dir="./results/vqa/"
)
vqa_eval.evaluate()

# Run Efficiency Evaluation
efficiency_eval = EfficiencyEvaluator(model, save_dir="./results/efficiency/")
efficiency_eval.evaluate()

# Example: Plot training loss curve if Trainer records it (assuming trainer.losses list)
if hasattr(trainer, 'losses'):
    plt.plot(trainer.losses)
    plt.title("Training Loss Curve")
    plt.xlabel("Batch #")
    plt.ylabel("Loss")
    plt.show()


[nltk_data] Downloading package wordnet to /home/loom/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/loom/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/loom/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


✅ GPU cache cleared.
Using device: 0
Loaded 591753 valid COCO samples, skipped 0 missing images.
Loaded 25014 valid COCO samples, skipped 0 missing images.
Train dataset size: 591753
Validation dataset size: 25014


  self.scaler = GradScaler()
  with autocast():
                                                                                

Epoch [1] finished. Average Loss: 0.693152


                                                                                

Epoch [2] finished. Average Loss: 0.693148


                                                                                

Epoch [3] finished. Average Loss: 0.693147


                                                                                

Epoch [4] finished. Average Loss: 0.693147


                                                                                

Epoch [5] finished. Average Loss: 0.693147


                                                                                

Epoch [6] finished. Average Loss: 0.693147


                                                                                

Epoch [7] finished. Average Loss: 0.693147


                                                                                

Epoch [8] finished. Average Loss: 0.693147


                                                                                

Epoch [9] finished. Average Loss: 0.693147


                                                                                

Epoch [10] finished. Average Loss: 0.693147
✅ Model successfully saved after training.
✅ Model saved at ./results/final_model.pth




TypeError: __init__() got an unexpected keyword argument 'predictions_path'

In [None]:
# Cell 2: Configuration and Data Setup

# Load config yaml or set manually
import yaml
with open('configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Instantiate dataset and model
train_dataset = CustomDataset(config['dataset']['train_path'])
model = MultimodalModel(config['model']).to(device)

# Trainer
trainer = Trainer(model=model, dataset=train_dataset, config=config['training'], device=device)

print("✅ Config and dataset/model setup complete.")


In [None]:
# Cell 3: Run Training and Save Model

trainer.train()

# Model saved inside trainer.train() - confirm path or save again if needed
print("✅ Training complete and model saved.")


In [None]:
# Cell 4: Run Captioning Evaluation

caption_eval = CaptioningEvaluator(
    ground_truth_path="./datasets/nocaps/ground_truth_coco_val2017.json",
    generated_captions_path="./results/generated_captions_coco_val2017.json",
    save_dir="./results/captioning/"
)
caption_eval.evaluate()
print("✅ Captioning evaluation done.")


In [None]:
# Cell 5: Run VQA Evaluation

vqa_eval = VQAEvaluator(
    ground_truth_path="./datasets/vqa2/ground_truth.json",
    predictions_path="./results/vqa_predictions.json",
    save_dir="./results/vqa/"
)
vqa_eval.evaluate()
print("✅ VQA evaluation done.")


In [None]:
# Cell 6: Run Efficiency Evaluation

eff_eval = EfficiencyEvaluator(model=model, save_dir="./results/efficiency/")
eff_eval.evaluate()
print("✅ Efficiency evaluation done.")


In [None]:
# Cell 7: Run Ablation Study (optional, long runtime)

runner = AblationRunner()
runner.run_ablation(rank_list=[16, 32, 64, 128, 256])
print("✅ Ablation studies completed.")


In [None]:
# Cell 8: Visualize and Summarize Results (you can expand this with matplotlib/seaborn)

import json
import matplotlib.pyplot as plt

def load_json_results(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

caption_results = load_json_results('./results/captioning/captioning_results.json')
vqa_results = load_json_results('./results/vqa/vqa_results.json')
efficiency_results = load_json_results('./results/efficiency/efficiency_results.json')

print("Captioning Results:", caption_results)
print("VQA Results:", vqa_results)
print("Efficiency Results:", efficiency_results)

# Example plotting BLEU from captioning results
plt.bar(caption_results.keys(), caption_results.values())
plt.title("Captioning Evaluation Metrics")
plt.show()


In [None]:
# Cell 9: Final summary

print("""
Project Evaluation Summary:

- Training completed and model saved.
- Captioning, VQA, efficiency, and ablation evaluations executed.
- Results saved to ./results/ folders.
- Summary table and plots generated.

You can export this notebook as HTML or PDF to include in your paper draft.

Next Steps:
- Analyze ablation study CSV/log files for detailed insights.
- Customize plots for paper figures.
- Prepare manuscript with methodology and results sections.

""")
