<a href="https://colab.research.google.com/github/K-Opoku/opokuml-geosight/blob/main/full_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
dataset_path='/content/drive/MyDrive/opokuml_geosight'
os.makedirs(dataset_path,exist_ok=True)


In [None]:
'''!wget --no-check-certificate -P /content/drive/MyDrive/opokuml_geosight https://madm.dfki.de/files/sentinel/EuroSAT.zip
!unzip /content/drive/MyDrive/opokuml_geosight/EuroSAT.zip -d /content/drive/MyDrive/opokuml_geosight'''
import os

zip_path = "/content/drive/MyDrive/opokuml_geosight/EuroSAT.zip"
dataset_dir = "/content/drive/MyDrive/opokuml_geosight/2750"

# Download only if zip doesn't exist
if not os.path.exists(zip_path):
    !wget --no-check-certificate -P /content/drive/MyDrive/opokuml_geosight https://madm.dfki.de/files/sentinel/EuroSAT.zip
else:
    print("EuroSAT.zip already exists, skipping download.")

# Unzip only if dataset folder doesn't exist
if not os.path.exists(dataset_dir):
    !unzip /content/drive/MyDrive/opokuml_geosight/EuroSAT.zip -d /content/drive/MyDrive/opokuml_geosight
else:
    print("EuroSAT dataset already extracted, skipping unzip.")


EuroSAT.zip already exists, skipping download.
EuroSAT dataset already extracted, skipping unzip.


In [None]:
import os
import shutil

# --- CONFIGURATION ---
# 1. Your SLOW Google Drive path (Where the images are now)
# IMPORTANT: Change this to match your actual Drive folder name!
source_dir = '/content/drive/MyDrive/data/eurosat'

# 2. The FAST Local path (Where we want to move them)
dest_dir = '/content/eurosat_fast'

# --- THE COPY LOGIC ---
if os.path.exists(dest_dir):
    print(f"‚úÖ Data already exists at {dest_dir}. Skipping copy.")
else:
    print(f"üöÄ Copying data from Drive to Local Disk... (This takes 1-2 mins)")
    try:
        shutil.copytree(source_dir, dest_dir)
        print(f"‚úÖ Done! Data is now ready at: {dest_dir}")
    except FileNotFoundError:
        print(f"‚ùå ERROR: Could not find your source folder: {source_dir}")
        print("Please check your Google Drive path!")


‚úÖ Data already exists at /content/eurosat_fast. Skipping copy.


In [None]:
from sklearn.model_selection import train_test_split
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader

In [None]:


class OpokuEuroSAT(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths   # plural, consistent everywhere
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
# Defining transforms
train_transform=transforms.Compose([transforms.Resize((224,224),interpolation=transforms.InterpolationMode.BICUBIC),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.RandomRotation(15),
                                    transforms.ColorJitter(brightness=0.1, contrast=0.1),
                                    transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
clean_transform=transforms.Compose([transforms.Resize((224,224)),
                                   transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [None]:
import shutil
import os

# Source (Where they are now in Drive)
source_dir = "/content/drive/MyDrive/opokuml_geosight/2750"

# Destination (Where your code expects them)
dest_dir = "/content/eurosat_fast/2750"

if os.path.exists(source_dir):
    print(f"üöÄ Copying files from Drive to Local Runtime...")
    shutil.copytree(source_dir, dest_dir, dirs_exist_ok=True)
    print("‚úÖ Files copied! Your original code will work now.")
else:
    print("‚ùå Error: I can't find the '2750' folder in your Drive path.")

üöÄ Copying files from Drive to Local Runtime...
‚úÖ Files copied! Your original code will work now.


In [None]:
# Organising data paths
root_dir = '/content/eurosat_fast/2750'  # <--- The fast local path

# üîë Initialize lists here
all_image_paths = []
all_labels = []

if not os.path.exists(root_dir):
    print(f'The path {root_dir} does not exist')
else:
    classes = sorted(os.listdir(root_dir))
    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}

    print('Gathering files')
    for cls_name in classes:
        cls_folder = os.path.join(root_dir, cls_name)
        if os.path.isdir(cls_folder):
            for img_name in os.listdir(cls_folder):
                if img_name.endswith(('.jpg', '.png', '.jpeg')):
                    all_image_paths.append(os.path.join(cls_folder, img_name))
                    all_labels.append(class_to_idx[cls_name])

# Split into train/val/test
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    all_image_paths,
    all_labels,
    test_size=0.3,
    stratify=all_labels,
    random_state=42
)

val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths,
    temp_labels,
    test_size=0.5,
    stratify=temp_labels,
    random_state=42
)

print(f"Total Images: {len(all_image_paths)}")
print(f"Train Set:    {len(train_paths)}")
print(f"Val Set:      {len(val_paths)}")
print(f"Test Set:     {len(test_paths)}")


Gathering files
Total Images: 27000
Train Set:    18900
Val Set:      4050
Test Set:     4050


In [None]:
import torch
from torch.utils.data import DataLoader

# 1. Create the Dataset Objects
# (We assume 'OpokuEuroSAT', 'train_transform', and 'clean_transform' are defined in your cells above)
train_dataset = OpokuEuroSAT(train_paths, train_labels, transform=train_transform)
val_dataset   = OpokuEuroSAT(val_paths, val_labels, transform=clean_transform)
test_dataset  = OpokuEuroSAT(test_paths, test_labels, transform=clean_transform)

# 2. Create DataLoaders
# Batch Size 32 is the "Safe Zone" for your GPU
# num_workers=2 makes loading fast from the local SSD
print("‚öôÔ∏è Building DataLoaders...")
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print("‚úÖ Data is ready to train!")

‚öôÔ∏è Building DataLoaders...
‚úÖ Data is ready to train!


In [None]:
import torch.nn as nn
import torchvision.models as models

class EuroSATConvNeXt_Unfrozen(nn.Module):
    def __init__(self, num_classes=10):
        super(EuroSATConvNeXt_Unfrozen, self).__init__()

        # Load the Backbone (ImageNet Weights)
        backbone = models.convnext_tiny(weights='DEFAULT')
        self.features = backbone.features

        # UNFREEZE: Allow the whole model to learn
        for param in self.features.parameters():
            param.requires_grad = True

        # The Head (Classifier)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.norm = nn.LayerNorm(768)
        self.flatten = nn.Flatten()
        self.classifier = nn.Linear(768, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.norm(x)
        x = self.classifier(x)
        return x

In [None]:
import torch.nn as nn
import torchvision.models as models

class EuroSATConvNeXt_Unfrozen(nn.Module):
    def __init__(self, num_classes=10):
        super(EuroSATConvNeXt_Unfrozen, self).__init__()

        # Load the Backbone (ImageNet Weights)
        backbone = models.convnext_tiny(weights='DEFAULT')
        self.features = backbone.features

        # UNFREEZE: Allow the whole model to learn
        for param in self.features.parameters():
            param.requires_grad = True

        # The Head (Classifier)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.norm = nn.LayerNorm(768)
        self.flatten = nn.Flatten()
        self.classifier = nn.Linear(768, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.norm(x)
        x = self.classifier(x)
        return x

In [None]:
import torch.optim as optim
import sys
import copy

def make_model(learning_rate=0.0001, device='cuda'):
    """Creates the model and optimizer with safe settings."""
    model = EuroSATConvNeXt_Unfrozen(num_classes=10).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    return model, optimizer

def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    """Runs the training loop with a visual progress bar."""

    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
    print(f"üöÄ Starting Training for {num_epochs} Epochs on {device}...")

    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        # --- Training Phase ---
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 40)

        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Visual Progress Bar (Updates every 50 batches)
            if (i+1) % 50 == 0 or (i+1) == len(train_loader):
                sys.stdout.write(f"\r   >> Batch {i+1}/{len(train_loader)} | Loss: {loss.item():.4f}")
                sys.stdout.flush()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total

        # --- Validation Phase ---
        model.eval()
        val_correct = 0
        val_total = 0
        val_loss = 0.0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / val_total

        print(f"\n   ‚úÖ Result: Train Acc: {epoch_acc:.4f} | Val Acc: {val_acc:.4f}")

        history['train_loss'].append(epoch_loss)
        history['train_acc'].append(epoch_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        if val_acc > best_acc:
            print(f"   üèÜ New Best Accuracy! ({best_acc:.4f} --> {val_acc:.4f}) Saving...")
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), 'best_eurosat_model.pth')

    print(f"\nüèÅ Training Complete. Best Validation Accuracy: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return history

In [None]:
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# --- 1. DELETE THE BAD FILE ---
bad_file = '/content/eurosat_fast/2750/PermanentCrop/PermanentCrop_670.jpg'

if os.path.exists(bad_file):
    os.remove(bad_file)
    print(f"‚úÖ FOUND and DELETED the bad file: {bad_file}")
else:
    print("‚ö†Ô∏è The bad file is already gone.")

# --- 2. REFRESH THE DATA LISTS (Must do this to remove the bad path) ---
root_dir = '/content/eurosat_fast/2750'
all_image_paths = []
all_labels = []
classes = sorted(os.listdir(root_dir))
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}

# Re-scan folders (now without the bad file)
for cls_name in classes:
    cls_folder = os.path.join(root_dir, cls_name)
    if os.path.isdir(cls_folder):
        for img_name in os.listdir(cls_folder):
            if img_name.endswith(('.jpg', '.png', '.jpeg')):
                all_image_paths.append(os.path.join(cls_folder, img_name))
                all_labels.append(class_to_idx[cls_name])

# --- 3. RE-SPLIT DATA ---
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    all_image_paths, all_labels, test_size=0.3, stratify=all_labels, random_state=42
)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42
)

# --- 4. RE-BUILD LOADERS ---
print("‚öôÔ∏è Updating DataLoaders...")
# (Assuming OpokuEuroSAT class still exists in memory)
train_dataset = OpokuEuroSAT(train_paths, train_labels, transform=train_transform)
val_dataset   = OpokuEuroSAT(val_paths, val_labels, transform=clean_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

print("üöÄ FIXED! You can now run the Training Cell below.")

‚úÖ FOUND and DELETED the bad file: /content/eurosat_fast/2750/PermanentCrop/PermanentCrop_670.jpg
‚öôÔ∏è Updating DataLoaders...
üöÄ FIXED! You can now run the Training Cell below.


In [None]:
"""# 1. Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
# 2. Make Model
model, optimizer = make_model(learning_rate=0.0001, device=device)

# 3. Train
history = train_and_evaluate(
    model=model,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    num_epochs=10,
    device=device
)"""

'# 1. Setup\ndevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")\ncriterion = nn.CrossEntropyLoss()\n# 2. Make Model\nmodel, optimizer = make_model(learning_rate=0.0001, device=device)\n\n# 3. Train\nhistory = train_and_evaluate(\n    model=model,\n    optimizer=optimizer,\n    train_loader=train_loader,\n    val_loader=val_loader,\n    criterion=criterion,\n    num_epochs=10,\n    device=device\n)'

In [None]:
import shutil
import os

# 1. Define where you want to save it on Google Drive
# (We use the same folder where your dataset zip is)
drive_folder = '/content/drive/MyDrive/opokuml_geosight'
source_file = 'best_eurosat_model.pth'
destination_file = os.path.join(drive_folder, 'eurosat_97_acc.pth') # Give it a cool name!

# 2. Copy the file
if os.path.exists(source_file):
    print(f"üöÄ Saving model to Google Drive: {destination_file}...")
    shutil.copy(source_file, destination_file)
    print("‚úÖ SAVED! Your model is safe. You can close the tab now.")
else:
    print("‚ö†Ô∏è Error: Could not find the model file yet. Did training start?")

‚ö†Ô∏è Error: Could not find the model file yet. Did training start?


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import torch

# 1. Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = EuroSATConvNeXt_Unfrozen(num_classes=10)
model.load_state_dict(torch.load('/content/drive/MyDrive/opokuml_geosight/eurosat_97_acc.pth'))
model.to(device)
model.eval()


# 2. Check on Test Data
correct = 0
total = 0
print("üìù Running Final Exam on Test Set...")

with torch.no_grad():
    for inputs, labels in test_loader: # Note: We use test_loader here
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

final_acc = correct / total
print("-" * 30)
print(f"üèÜ OFFICIAL TEST ACCURACY: {final_acc:.4f} ({final_acc*100:.2f}%)")
print("-" * 30)

üìù Running Final Exam on Test Set...
------------------------------
üèÜ OFFICIAL TEST ACCURACY: 0.9886 (98.86%)
------------------------------


Converting model to Onnx

In [None]:
!pip install onnx onnxscript onnxruntime

Collecting onnx
  Downloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Collecting onnxscript
  Downloading onnxscript-0.5.7-py3-none-any.whl.metadata (13 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.13-py3-none-any.whl.metadata (3.2 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (18.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m18.1/18.1 MB[0m [31m100.6 MB/s[0m eta [36m0:00:00[0m
[?25hDo

In [None]:
model = EuroSATConvNeXt_Unfrozen(num_classes=10)
model.load_state_dict(torch.load(
    '/content/drive/MyDrive/opokuml_geosight/eurosat_97_acc.pth',
    map_location=device   # ensures weights load correctly on CPU/GPU
))
model.to(device)
model.eval()


# creating a dummy input
dummy_input=torch.randn(1,3,224,224).to(device)

# Exporting with torch.onnx.export
torch.onnx.export(model,
                  dummy_input,
                  'eurosat.onnx',
                  export_params=True,
                  opset_version=11,
                  do_constant_folding=True,
                  input_names=['input'],
                  output_names=['output'],
                  dynamic_axes={'input':{0:'batch_size'}, 'output':{0:'batch_size'}}
                  )


  torch.onnx.export(model,
W1226 16:07:35.857000 177 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 11 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `EuroSATConvNeXt_Unfrozen([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `EuroSATConvNeXt_Unfrozen([...]` with `torch.export.export(..., strict=False)`... ‚úÖ
[torch.onnx] Run decomposition...




[torch.onnx] Run decomposition... ‚úÖ
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ‚úÖ


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

ONNXProgram(
    model=
        <
            ir_version=10,
            opset_imports={'': 18},
            producer_name='pytorch',
            producer_version='2.9.0+cu126',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"input"<FLOAT,[s77,3,224,224]>
            ),
            outputs=(
                %"output"<FLOAT,[1,10]>
            ),
            initializers=(
                %"features.0.0.bias"<FLOAT,[96]>{TorchTensor(...)},
                %"features.0.1.weight"<FLOAT,[96]>{TorchTensor(...)},
                %"features.0.1.bias"<FLOAT,[96]>{TorchTensor(...)},
                %"features.1.0.layer_scale"<FLOAT,[96,1,1]>{TorchTensor(...)},
                %"features.1.0.block.0.bias"<FLOAT,[96]>{TorchTensor(...)},
                %"features.1.0.block.2.weight"<FLOAT,[96]>{TorchTensor(...)},
                %"features.1.0.block.2.bias"<FLOAT,[96]>{TorchTensor(...)},
        

In [None]:
import onnx
onnx_model=onnx.load('eurosat.onnx')
onnx.checker.check_model(onnx_model)
print('succeeded')

succeeded


In [None]:
shutil.copy('/content/eurosat.onnx','/content/drive/MyDrive/opokuml_geosight/eurosat.onnx')
print('save completed')

save completed


In [None]:
!pip install onnxruntime



In [None]:
import onnxruntime as ort
import numpy as np
session=ort.InferenceSession("/content/eurosat.onnx")

# Get one batch from the test loader
test_image, test_label = next(iter(test_loader))

# Take the first image in the batch
test_tensor = test_image[0].unsqueeze(0)   # shape (1,3,224,224)
true_label = test_label[0].item()

model.eval()
with torch.no_grad():
    pytorch_output = model(test_tensor.to(device))

print("‚úÖ True label:", true_label)
print("PyTorch prediction:", pytorch_output.argmax(dim=1).item())

onnx_output = session.run(None, {"input": test_tensor.cpu().numpy()})[0]

print("ONNX prediction:", np.argmax(onnx_output, axis=1)[0])



‚úÖ True label: 6
PyTorch prediction: 6
ONNX prediction: 6
