In [None]:
# Inference notebook: load trained model & predict on test set

import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision import models

# Paths & parameters
BASE_INPUT_PATH = '/kaggle/input/soil-classification/soil_classification-2025'
TEST_DIR = os.path.join(BASE_INPUT_PATH, 'test')
TEST_IDS_CSV = os.path.join(BASE_INPUT_PATH, 'test_ids.csv')
MODEL_PATH = '/kaggle/working/best_effnet_b1.pth'
OUTPUT_DIR = '/kaggle/working/'
IMG_SIZE = 240
NUM_CLASSES = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df_test = pd.read_csv(TEST_IDS_CSV)

# Transforms
mean, std = [0.485,0.456,0.406], [0.229,0.224,0.225]
test_transforms = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(mean, std)
])

class TestDataset(Dataset):
    def __init__(self, df, image_dir, transforms):
        self.df = df
        self.dir = image_dir
        self.transforms = transforms
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        path = os.path.join(self.dir, img_id)
        try:
            img = Image.open(path).convert('RGB')
        except:
            img = Image.new('RGB',(IMG_SIZE,IMG_SIZE),color='black')
        img = self.transforms(img)
        return img, img_id

test_ds = TestDataset(df_test, TEST_DIR, test_transforms)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

# Load model
model = models.efficientnet_b1(weights=None)
in_f = model.classifier[1].in_features
model.classifier[1] = torch.nn.Linear(in_f, NUM_CLASSES)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.to(DEVICE).eval()

# Predict
all_ids, all_preds = [], []
with torch.no_grad():
    for X, ids in test_loader:
        X = X.to(DEVICE)
        _, p = model(X).max(1)
        all_preds += p.cpu().tolist()
        all_ids  += ids

# Map back to labels
class_names = sorted(pd.read_csv(os.path.join(BASE_INPUT_PATH,'train_labels.csv'))['soil_type'].unique())
int_to_label = {i:lbl for i,lbl in enumerate(class_names)}
pred_labels = [int_to_label[p] for p in all_preds]

# Build submission
submission = pd.DataFrame({'image_id': all_ids, 'soil_type': pred_labels})
submission.to_csv(os.path.join(OUTPUT_DIR,'submission.csv'), index=False)
print("Submission written to", os.path.join(OUTPUT_DIR,'submission.csv'))
print(submission.head())
