## 1) Verify GPU

In [None]:
import torch, sys
print('GPU Available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('Device:', torch.cuda.get_device_name(0))
    print('Memory (GB):', round(torch.cuda.get_device_properties(0).total_memory/1e9, 2))
else:
    raise SystemExit('No GPU detected. Go to Runtime → Change runtime type → GPU')

## 2) Clone repo and install dependencies

In [None]:
import os
!git clone https://github.com/AlexSkogum/image-captioning-app.git /content/app 2>/dev/null || echo 'Repo exists'
os.chdir('/content/app')
print('CWD:', os.getcwd())
# Keep torch as provided by Colab; install project extras
!pip install -q kaggle pandas pillow numpy requests nltk pyyaml
import nltk; nltk.download('punkt', quiet=True)
print('✓ Dependencies installed & NLTK punkt downloaded')

## 3) Configure Kaggle credentials

In [None]:
import os, json
kaggle_dir = os.path.expanduser('~/.kaggle')
os.makedirs(kaggle_dir, exist_ok=True)
# Option A: Upload kaggle.json
# from google.colab import files
# uploaded = files.upload()
# for fn in uploaded:
#     if fn.endswith('kaggle.json'):
#         !mv {fn} {kaggle_dir}/kaggle.json
#         !chmod 600 {kaggle_dir}/kaggle.json
#         print('✓ kaggle.json configured')
# Option B: Inline credentials (edit below)
kaggle_config = { 'username': 'YOUR_KAGGLE_USERNAME', 'key': 'YOUR_KAGGLE_API_KEY' }
with open(f'{kaggle_dir}/kaggle.json', 'w') as f: json.dump(kaggle_config, f)
!chmod 600 ~/.kaggle/kaggle.json
print('⚠️  Replace YOUR_KAGGLE_USERNAME and YOUR_KAGGLE_API_KEY if you did not upload kaggle.json')

## 4) Download Flickr8k (~5–10 min)

In [None]:
import os, time
!mkdir -p data
print('Downloading Flickr8k...')
t0=time.time(); !kaggle datasets download -d shadabhussain/flickr8k -p data --unzip -q
print(f'✓ Downloaded in {time.time()-t0:.1f}s')
imgs = os.listdir('data/Images') if os.path.exists('data/Images') else []
print('Images:', len(imgs)); assert len(imgs)>0, 'No images found; check Kaggle credentials.'

## 5) Prepare dataset and build vocabulary

In [None]:
# Prepare captions.csv
!python scripts/prepare_flickr8k.py
import os, pandas as pd
assert os.path.exists('data/captions.csv'), 'captions.csv missing'
df = pd.read_csv('data/captions.csv'); print('Rows:', len(df)); display(df.head(3))
# Build vocab
!python scripts/build_vocab.py
import pickle
with open('data/vocab.pkl','rb') as f: vocab = pickle.load(f)
print('Vocab size:', len(vocab))

## 6) (Optional) Speed tweaks to config

In [None]:
import yaml
path='configs/config.yaml'
with open(path) as f: cfg=yaml.safe_load(f)
changed=[]
def setk(keys,val):
    d=cfg
    for k in keys[:-1]:
        if isinstance(d,dict) and k in d: d=d[k]
        else: return
    if keys[-1] in d and d[keys[-1]]!=val: d[keys[-1]]=val; changed.append((keys,val))
setk(['training','max_epochs'],5)
setk(['training','batch_size'],64)
if 'model' in cfg and isinstance(cfg['model'],dict) and 'encoder' in cfg['model'] and cfg['model']['encoder'] not in ('resnet18','resnet34','resnet50'):
    cfg['model']['encoder']='resnet50'; changed.append((['model','encoder'],'resnet50'))
with open(path,'w') as f: yaml.safe_dump(cfg,f,sort_keys=False)
print('Changed:', changed if changed else 'None')

## 7) Train

In [None]:
import time, os
t0=time.time()
!python -m src.train --config configs/config.yaml
mins=round((time.time()-t0)/60,1); print('Training time (min):', mins)
print('Checkpoint exists:', os.path.exists('checkpoints/best.pth'))
if os.path.exists('checkpoints/best.pth'):
    print('Checkpoint size (MB):', round(os.path.getsize('checkpoints/best.pth')/1e6,2))

## 8) Download checkpoint

In [None]:
from google.colab import files
ck='checkpoints/best.pth'
import os
if os.path.exists(ck):
    print('Downloading best.pth ...')
    files.download(ck)
else:
    print('best.pth not found. Check training logs above.')