# GhanaSegNet Colab Training Notebook

This notebook sets up your environment, installs dependencies, and runs your baseline training script for UNet, DeepLabV3+, and SegFormer-B0 on Colab GPU.

## Setup Instructions:
1. Run each cell in order
2. Make sure GPU is enabled: Runtime > Change runtime type > Hardware accelerator > GPU
3. Your data should be uploaded to Google Drive or included in your GitHub repo

In [None]:
# Mount Google Drive (if your data is stored there)
from google.colab import drive
drive.mount('/content/drive')

# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU detected - switch to GPU runtime!")

In [None]:
# Clone your GitHub repo
!git clone https://github.com/EricBaidoo/GhanaSegNet.git
%cd GhanaSegNet

# Check if we have the expected files
!ls -la

In [None]:
# Download and extract data from Google Drive
# First, upload your data.tar.gz to Google Drive, then update the path below

# Option 1: If you uploaded data.tar.gz to Drive
# !cp "/content/drive/MyDrive/data.tar.gz" .
# !tar -xzf data.tar.gz

# Option 2: If you uploaded the data folder directly to Drive
!cp -r "/content/drive/MyDrive/data" .

# Verify data is present
!ls -la data/
!echo "Train images:" && ls data/train/images/ | wc -l
!echo "Train masks:" && ls data/train/masks/ | wc -l

In [None]:
# Alternative: Upload data directly to Colab (slower for large datasets)
# Uncomment the lines below if you prefer direct upload

# from google.colab import files
# import zipfile
# import os

# # Upload your data.zip file
# uploaded = files.upload()
# 
# # Extract the uploaded file
# for filename in uploaded.keys():
#     if filename.endswith('.zip'):
#         with zipfile.ZipFile(filename, 'r') as zip_ref:
#             zip_ref.extractall('.')
#     elif filename.endswith('.tar.gz'):
#         !tar -xzf {filename}

# print("Upload complete!")

In [None]:
# Option 3: Download from Kaggle (after uploading your dataset there)
# 1. Upload your data to Kaggle as a public dataset
# 2. Get your Kaggle API credentials from kaggle.com/account
# 3. Uncomment and run the code below:

# !pip install kaggle
# from google.colab import files
# 
# # Upload your kaggle.json file
# uploaded = files.upload()  # Upload kaggle.json
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json
# 
# # Download your dataset (replace with your dataset path)
# !kaggle datasets download yourusername/ghanasegnet-data
# !unzip ghanasegnet-data.zip

In [None]:
# Install dependencies
!pip install -r requirements.txt

# Install additional packages that might be needed
!pip install transformers albumentations

# Verify installations
import torch
import torchvision
import transformers
print(f"PyTorch: {torch.__version__}")
print(f"Torchvision: {torchvision.__version__}")
print(f"Transformers: {transformers.__version__}")

In [None]:
# Run training - choose one of the following:

# Train all models (this will take a long time)
# !python scripts/train_baselines.py --model all --epochs 1

# Train individual models:
# UNet only
# !python scripts/train_baselines.py --model unet --epochs 1

# DeepLabV3+ only  
!python scripts/train_baselines.py --model deeplabv3plus --epochs 1

# SegFormer only
# !python scripts/train_baselines.py --model segformer --epochs 1

In [None]:
# Save results and checkpoints to Google Drive
import os
from datetime import datetime

# Create timestamped folder
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_dir = f"/content/drive/MyDrive/GhanaSegNet_results_{timestamp}"

# Copy checkpoints and results
if os.path.exists("checkpoints"):
    !mkdir -p "{save_dir}"
    !cp -r checkpoints "{save_dir}/"
    !cp -r *.json "{save_dir}/" 2>/dev/null || echo "No JSON files to copy"
    print(f"Results saved to: {save_dir}")
else:
    print("No checkpoints directory found - training may have failed")

# List what was saved
!ls -la "{save_dir}" 2>/dev/null || echo "Save directory not created"

In [None]:
# Check training results
import json
import os

# Check if training summary exists
if os.path.exists("checkpoints/training_summary.json"):
    with open("checkpoints/training_summary.json", "r") as f:
        results = json.load(f)
    print("Training Summary:")
    for model, result in results.items():
        print(f"{model.upper()}: IoU={result['best_iou']:.4f} ({result['status']})")
else:
    print("No training summary found yet")

# List checkpoint directories
if os.path.exists("checkpoints"):
    print("\nCheckpoint directories:")
    !ls -la checkpoints/
else:
    print("No checkpoints directory found")