## Step 1: Verify GPU Availability

In [None]:
import torch

print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Device:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")
else:
    print(" No GPU detected!")
    print("Go to Runtime → Change runtime type → GPU")

## Step 2: Clone Repository and Install Dependencies

In [None]:
!git clone https://github.com/AlexSkogum/image-captioning-app.git
%cd image-captioning-app
!pwd

In [None]:
!pip install -q torch torchvision fastapi gradio pillow pandas numpy requests nltk pyyaml
print("Dependencies installed")

## Step 3: Configure Kaggle API and Download Dataset

In [None]:
import json
import os
from pathlib import Path

os.makedirs('/root/.kaggle', exist_ok=True)

# Option A: Upload kaggle.json from your computer
# from google.colab import files
# files.upload()  # Select kaggle.json
# !mv kaggle.json /root/.kaggle/
# !chmod 600 /root/.kaggle/kaggle.json

# Option B: Create kaggle.json with your credentials
# Replace the values below with your Kaggle credentials
kaggle_config = {
    "username": "YOUR_KAGGLE_USERNAME",
    "key": "YOUR_KAGGLE_API_KEY"
}

with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump(kaggle_config, f)

!chmod 600 /root/.kaggle/kaggle.json
print("Kaggle configured (replace credentials above with your actual keys)")

In [None]:
# Download and extract Flickr8k dataset
!mkdir -p data
!kaggle datasets download -d shadabhussain/flickr8k -p data/ --unzip
print( Flickr8k dataset downloaded")

## Step 4: Prepare Dataset and Build Vocabulary

In [None]:
# Prepare the dataset (creates captions.csv)
!python scripts/prepare_flickr8k.py
print("Dataset prepared")

In [None]:
# Build vocabulary from captions
!python scripts/build_vocab.py
print("Vocabulary built")

In [None]:
# Verify dataset preparation
import pandas as pd

if os.path.exists('data/captions.csv'):
    df = pd.read_csv('data/captions.csv')
    print(f"Dataset shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print("\nFirst 3 rows:")
    print(df.head(3))
else:
    print("  captions.csv not found")

## Step 5: Train the Model

In [None]:
# Train the model on GPU
!python -m src.train --config configs/config.yaml
print("Training complete!")

## Step 6: Download the Trained Checkpoint

In [None]:
from google.colab import files
import os

checkpoint_path = 'checkpoints/best.pth'
if os.path.exists(checkpoint_path):
    files.download(checkpoint_path)
    print(f" Downloaded {checkpoint_path}")
else:
    print(f" Checkpoint not found at {checkpoint_path}")

## Next Steps: Use the Trained Model Locally

1. **Download** the checkpoint file (`best.pth`) from above
2. **Place it** in your local `checkpoints/` folder
3. **Restart** your local API and Gradio UI:

```bash
# Terminal 1: Start API
python -m uvicorn src.api.main:app --reload --port 8000

# Terminal 2: Start Gradio UI
python web/gradio_app.py
```

Then open: **http://localhost:7860**