In [139]:
# Imports and Configuration

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import os
from skimage import io
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.cluster import KMeans
from efficientnet_pytorch import EfficientNet

# Configuration
CONFIG = {
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "model_path": "Trained_Models/EfficientNet.pthpth",
    "csv_file": "SarawakDataset/2017-2018.csv",
    "root_dir": "SarawakDataset",
    "batch_size": 16,
    "num_workers": 4,
    "num_classes": 4,
    "output_file": "SarawakDataset/predictions.csv",
}

print("Configuration and imports complete.")

Configuration and imports complete.


In [140]:
import torch
from torch.utils.data import Dataset
import numpy as np
import os
from skimage import io


class UnlabeledForestDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        lat = self.data.iloc[idx, 0]
        long = self.data.iloc[idx, 1]
        event_year = int(self.data.iloc[idx, 2])
        example_path = self.data.iloc[idx, 3]

        folder = example_path.split("/")[-1]
        filename_prefix = folder.replace(".", "_")

        images = []
        for i in range(4):
            current_year = event_year + 1 + i
            image_filename = f"{filename_prefix}_{current_year}.png"
            image_path = os.path.join(self.root_dir, folder, "images", image_filename)

            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image file not found: {image_path}")

            image = io.imread(image_path)

            if self.transform:
                augmented = self.transform(image=image)
                image = augmented["image"]

            if not isinstance(image, torch.Tensor):
                image = torch.from_numpy(image).type(torch.float)
                image = image.permute(2, 0, 1)

            image = image[:, 86:246, 86:246]
            images.append(image)

        images = torch.stack(images)  # Shape: [4, 3, 160, 160]

        slope_path = os.path.join(self.root_dir, folder, "auxiliary", "srtm.npy")
        if not os.path.exists(slope_path):
            raise FileNotFoundError(f"Slope file not found: {slope_path}")

        slope = np.load(slope_path)
        slope = torch.from_numpy(slope).type(torch.float)
        slope = slope[86:246, 86:246]

        return images, slope, (lat, long, event_year, folder)

In [141]:
import albumentations as A
from albumentations.pytorch import ToTensorV2


def get_data_loader(config):
    transform = A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.Rotate(270),
            A.ElasticTransform(p=0.4, alpha=120, sigma=120 * 0.05, alpha_affine=None),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ]
    )

    dataset = UnlabeledForestDataset(
        csv_file=config["csv_file"], root_dir=config["root_dir"], transform=transform
    )

    return torch.utils.data.DataLoader(
        dataset,
        batch_size=config["batch_size"],
        shuffle=False,
        num_workers=0,  # Set to 0 to run in the main process
        pin_memory=True,
    )

In [142]:
# Data shape check
data_loader = get_data_loader(CONFIG)
dataiter = iter(data_loader)

for i in range(3):  # Check first 3 samples
    print(f"\nSample {i+1}:")
    images, slopes, metadata = next(dataiter)
    print(f"Batch image shape: {images.shape}")
    print(f"Batch slope shape: {slopes.shape}")
    # print(f"Metadata: {metadata}")

print("\nDataLoader returns:")
print(f"images shape: {images.shape}")
print(f"slopes shape: {slopes.shape}")
print(f"metadata length: {len(metadata)}")


Sample 1:
Batch image shape: torch.Size([16, 4, 3, 160, 160])
Batch slope shape: torch.Size([16, 160, 160])

Sample 2:
Batch image shape: torch.Size([16, 4, 3, 160, 160])
Batch slope shape: torch.Size([16, 160, 160])

Sample 3:
Batch image shape: torch.Size([16, 4, 3, 160, 160])
Batch slope shape: torch.Size([16, 160, 160])

DataLoader returns:
images shape: torch.Size([16, 4, 3, 160, 160])
slopes shape: torch.Size([16, 160, 160])
metadata length: 4


In [143]:
# Model Loading
from model.effnet import *

def load_model(config):
    model = EffnetMLP("efficientnet-b2", config["num_classes"])
    model.load_state_dict(
        state_dict=torch.load(
            config["model_path"], map_location=config["device"], weights_only=False)
    )
    model = model.to(config["device"])
    model.eval()
    return model


print("Model loading function defined.")

Model loading function defined.


In [146]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans


def predict_and_save_results(model, data_loader, config):
    results = []
    all_features = []

    model.eval()
    with torch.no_grad():
        for batch_idx, (images, slopes, metadata) in enumerate(data_loader):
            try:
                images = images.to(config["device"])  # Shape: [B, 4, 3, 160, 160]
                slopes = slopes.to(config["device"])  # Shape: [B, 160, 160]

                batch_size, num_years, channels, height, width = images.shape

                # Process each year separately
                batch_features = []
                for year in range(num_years):
                    year_image = images[:, year, :, :, :]

                    # Get the features (before the final classification layer)
                    x = model.extract(year_image).squeeze(-1).squeeze(-1)
                    x = torch.flatten(x, 1)
                    x = model.linear_img(x)

                    s = model.flatten(slopes)
                    s = model.batch1d_n(s)
                    s = model.dropout(s)
                    s = model.batch1d(F.leaky_relu(model.linear1(s)))
                    s = F.leaky_relu(model.linear2(s))

                    features = torch.cat((x, s), dim=1)
                    features = model.linear_fin(features)

                    batch_features.append(features)

                # Average the features across years
                batch_features = torch.stack(batch_features).mean(dim=0)
                all_features.append(batch_features.cpu().numpy())

                # Process metadata
                for i in range(len(metadata[0])):
                    lat, long, year, folder = (
                        metadata[0][i],
                        metadata[1][i],
                        metadata[2][i],
                        metadata[3][i],
                    )
                    results.append(
                        {
                            "latitude": float(lat),
                            "longitude": float(long),
                            "year": int(year),
                            "folder": str(folder),
                        }
                    )

                if batch_idx % 10 == 0:
                    print(f"Processed {batch_idx * config['batch_size']} samples")

            except Exception as e:
                print(f"Error processing batch {batch_idx}: {str(e)}")
                continue

    # Concatenate all features
    features = np.concatenate(all_features, axis=0)

    # Use K-means clustering to assign labels
    kmeans = KMeans(n_clusters=config["num_classes"], random_state=42)
    predicted_labels = kmeans.fit_predict(features)

    # Map cluster numbers to class names
    label_map = {
        0: "Grassland shrubland",
        1: "Other",
        2: "Plantation",
        3: "Smallholder agriculture",
    }

    # Add predictions to results
    for i, result in enumerate(results):
        result["predicted_label"] = label_map[predicted_labels[i]]

    # Save results to CSV
    df = pd.DataFrame(results)
    df.to_csv(config["output_file"], index=False, float_format="%.6f")
    print(f"Results saved to {config['output_file']}")

### Main Execution

In [147]:
try:
    # Check dataset structure
    sample_row = pd.read_csv(CONFIG["csv_file"]).iloc[0]
    folder_name = sample_row["example_path"].split("/")[-1]
    filename_prefix = folder_name.replace(".", "_")
    event_year = int(sample_row["year"])
    sample_path = os.path.join(
        CONFIG["root_dir"],
        folder_name,
        "Images",
        f"{filename_prefix}_{event_year + 1}.png",
    )
    if not os.path.exists(sample_path):
        raise FileNotFoundError(
            f"Sample image not found: {sample_path}. Please check your dataset structure and CONFIG settings."
        )
    print("Dataset structure verified.")

    # Load data
    data_loader = get_data_loader(CONFIG)
    print("Data loader created successfully")

    # Load model
    model = load_model(CONFIG)
    print("Model loaded successfully")

    # Print loaded state dict keys
    state_dict = torch.load(
        CONFIG["model_path"], map_location=CONFIG["device"], weights_only=False
    )

    # Make predictions and save results
    predict_and_save_results(model, data_loader, CONFIG)

    print("Prediction process completed.")
except Exception as e:
    print(f"An error occurred during execution: {str(e)}")
    import traceback

    traceback.print_exc()

Dataset structure verified.
Data loader created successfully
Loaded pretrained weights for efficientnet-b2
Model loaded successfully
Processed 0 samples
Results saved to SarawakDataset/predictions2_output.csv
Prediction process completed.
