In [None]:
import os
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from src.model import get_model


In [None]:
# Paths
TEST_IMAGES = '/kaggle/input/soil-classification/soil_classification-2025/test'
TEST_CSV = '/kaggle/input/soil-classification/soil_classification-2025/test_ids.csv'

# Load test image IDs
test_df = pd.read_csv(TEST_CSV)


In [None]:
IMG_SIZE = 224

test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

class SoilTestDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.loc[idx, 'image_id']
        image = Image.open(os.path.join(self.img_dir, img_id)).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_id

test_dataset = SoilTestDataset(test_df, TEST_IMAGES, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 5  # same as during training

model = get_model(num_classes=num_classes, pretrained=False)
model.load_state_dict(torch.load("best_model.pth", map_location=device))  # path to saved model
model.to(device)
model.eval()


In [None]:
all_preds = []
all_ids = []

with torch.no_grad():
    for images, img_ids in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_ids.extend(img_ids)


In [None]:
# This must match the train label encoding
idx2soil = {0: 'Black', 1: 'Laterite', 2: 'Red', 3: 'Alluvial', 4: 'Desert'}

submission = pd.DataFrame({
    "image_id": all_ids,
    "soil_type": [idx2soil[p] for p in all_preds]
})

submission.to_csv("submission.csv", index=False)
submission.head()
