In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms, models
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [None]:
TRAIN_IMG_PATH = "/kaggle/input/soil-classification-dataset/soil_classification-2025/train"
TEST_IMG_PATH = "/kaggle/input/soil-classification-dataset/soil_classification-2025/test"
TRAIN_LABELS = "/kaggle/input/soil-classification-dataset/soil_classification-2025/train_labels.csv"
TEST_IDS = "/kaggle/input/soil-classification-dataset/soil_classification-2025/test_ids.csv"

In [None]:
train_df = pd.read_csv(TRAIN_LABELS)
le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['soil_type'])  # 0 to 3
class_names = le.classes_

In [None]:
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, test=False):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_id = self.df.iloc[idx]['image_id']
        image = Image.open(os.path.join(self.img_dir, image_id)).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.test:
            return image, image_id
        else:
            label = self.df.iloc[idx]['label']
            return image, label

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

In [None]:
train_dataset = SoilDataset(train_df, TRAIN_IMG_PATH, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model with new syntax to avoid warning
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 4)  # 4 classes
model = model.to(device)

In [None]:
import torch
from torch import nn
from tqdm import tqdm
from sklearn.metrics import f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

epochs = 100
model.to(device)
model.train()

for epoch in range(epochs):
    running_loss = 0
    all_preds, all_labels = [], []

    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        all_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    scheduler.step()
    f1 = f1_score(all_labels, all_preds, average='macro')
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, F1 Score: {f1:.6f}")

    if f1 == 1.0:
        print("Perfect F1 Score achieved! Stopping training.")
        break

In [None]:
from sklearn.metrics import f1_score

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in train_loader:  # use full, unshuffled train_loader here
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

train_f1 = f1_score(all_labels, all_preds, average='macro')
print(f"F1 Score (Train - Full Evaluation): {train_f1:.10f}")

In [None]:
test_df = pd.read_csv(TEST_IDS)
test_dataset = SoilDataset(test_df, TEST_IMG_PATH, transform=transform, test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predictions = []
image_ids = []

with torch.no_grad():
    for images, ids in test_loader:
        images = images.cuda()
        outputs = model(images)
        preds = torch.argmax(outputs, 1).cpu().numpy()
        predictions.extend(preds)
        image_ids.extend(ids)

submission = pd.DataFrame({
    "image_id": image_ids,
    "soil_type": le.inverse_transform(predictions)
})

submission.to_csv("submission.csv", index=False)