In [15]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm

In [16]:
TRAIN_DIR = "/kaggle/input/soil-classification-part-2/soil_competition-2025/train"
TEST_DIR = "/kaggle/input/soil-classification-part-2/soil_competition-2025/test"
TRAIN_CSV = "/kaggle/input/soil-classification-part-2/soil_competition-2025/train_labels.csv"
TEST_CSV = "/kaggle/input/soil-classification-part-2/soil_competition-2025/test_ids.csv"

df = pd.read_csv(TRAIN_CSV)
df.columns = df.columns.str.strip()
df.rename(columns={'label': 'soil_type'}, inplace=True)

# Encode Labels
le = LabelEncoder()
df['soil_type'] = le.fit_transform(df['soil_type'])

In [17]:
print(df.columns.tolist())

['image_id', 'soil_type']


In [18]:
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['soil_type'], random_state=42)

In [19]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [20]:
class SoilDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None, test=False):
        self.data = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_id = self.data.iloc[idx]['image_id']
        img_path = os.path.join(self.image_dir, img_id)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.test:
            return image, img_id
        label = self.data.iloc[idx]['soil_type']
        return image, label

In [21]:
train_loader = DataLoader(SoilDataset(train_df, TRAIN_DIR, transform), batch_size=32, shuffle=True)
val_loader = DataLoader(SoilDataset(val_df, TRAIN_DIR, transform), batch_size=32, shuffle=False)

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
from tqdm import tqdm
from torchvision.models import resnet18, ResNet18_Weights

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet18 with updated weights syntax
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 4)  # Change 4 to match your number of classes
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
from tqdm import tqdm
from torchvision import models
from torchvision.models import ResNet18_Weights

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet18 with updated weights syntax
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 4)  # 4 classes
model = model.to(device)

# Correct loss function — no weight unless needed
criterion = nn.CrossEntropyLoss()  # ✅ do not add weight unless required

# Optimizer and scheduler
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [28]:
best_f1 = 0
epochs = 50

for epoch in range(epochs):
    model.train()
    train_loss = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation
    model.eval()
    val_preds, val_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    val_f1 = f1_score(val_labels, val_preds, average='weighted')
    print(f"Epoch {epoch+1}, Loss: {train_loss/len(train_loader):.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pth")

    scheduler.step()

Epoch 1/50: 100%|██████████| 31/31 [00:18<00:00,  1.67it/s]


Epoch 1, Loss: 0.0734, Val F1: 1.0000


Epoch 2/50: 100%|██████████| 31/31 [00:12<00:00,  2.39it/s]


Epoch 2, Loss: 0.0000, Val F1: 1.0000


Epoch 3/50: 100%|██████████| 31/31 [00:13<00:00,  2.29it/s]


Epoch 3, Loss: 0.0000, Val F1: 1.0000


Epoch 4/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 4, Loss: 0.0000, Val F1: 1.0000


Epoch 5/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 5, Loss: 0.0000, Val F1: 1.0000


Epoch 6/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 6, Loss: 0.0000, Val F1: 1.0000


Epoch 7/50: 100%|██████████| 31/31 [00:13<00:00,  2.29it/s]


Epoch 7, Loss: 0.0000, Val F1: 1.0000


Epoch 8/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 8, Loss: 0.0000, Val F1: 1.0000


Epoch 9/50: 100%|██████████| 31/31 [00:12<00:00,  2.39it/s]


Epoch 9, Loss: 0.0000, Val F1: 1.0000


Epoch 10/50: 100%|██████████| 31/31 [00:12<00:00,  2.43it/s]


Epoch 10, Loss: 0.0000, Val F1: 1.0000


Epoch 11/50: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s]


Epoch 11, Loss: 0.0000, Val F1: 1.0000


Epoch 12/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 12, Loss: 0.0000, Val F1: 1.0000


Epoch 13/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 13, Loss: 0.0000, Val F1: 1.0000


Epoch 14/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 14, Loss: 0.0000, Val F1: 1.0000


Epoch 15/50: 100%|██████████| 31/31 [00:13<00:00,  2.33it/s]


Epoch 15, Loss: 0.0000, Val F1: 1.0000


Epoch 16/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 16, Loss: 0.0000, Val F1: 1.0000


Epoch 17/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 17, Loss: 0.0000, Val F1: 1.0000


Epoch 18/50: 100%|██████████| 31/31 [00:13<00:00,  2.38it/s]


Epoch 18, Loss: 0.0000, Val F1: 1.0000


Epoch 19/50: 100%|██████████| 31/31 [00:13<00:00,  2.36it/s]


Epoch 19, Loss: 0.0000, Val F1: 1.0000


Epoch 20/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 20, Loss: 0.0000, Val F1: 1.0000


Epoch 21/50: 100%|██████████| 31/31 [00:13<00:00,  2.37it/s]


Epoch 21, Loss: 0.0000, Val F1: 1.0000


Epoch 22/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 22, Loss: 0.0000, Val F1: 1.0000


Epoch 23/50: 100%|██████████| 31/31 [00:13<00:00,  2.37it/s]


Epoch 23, Loss: 0.0000, Val F1: 1.0000


Epoch 24/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 24, Loss: 0.0000, Val F1: 1.0000


Epoch 25/50: 100%|██████████| 31/31 [00:12<00:00,  2.41it/s]


Epoch 25, Loss: 0.0000, Val F1: 1.0000


Epoch 26/50: 100%|██████████| 31/31 [00:13<00:00,  2.37it/s]


Epoch 26, Loss: 0.0000, Val F1: 1.0000


Epoch 27/50: 100%|██████████| 31/31 [00:13<00:00,  2.33it/s]


Epoch 27, Loss: 0.0000, Val F1: 1.0000


Epoch 28/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 28, Loss: 0.0000, Val F1: 1.0000


Epoch 29/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 29, Loss: 0.0000, Val F1: 1.0000


Epoch 30/50: 100%|██████████| 31/31 [00:12<00:00,  2.39it/s]


Epoch 30, Loss: 0.0000, Val F1: 1.0000


Epoch 31/50: 100%|██████████| 31/31 [00:13<00:00,  2.31it/s]


Epoch 31, Loss: 0.0000, Val F1: 1.0000


Epoch 32/50: 100%|██████████| 31/31 [00:13<00:00,  2.34it/s]


Epoch 32, Loss: 0.0000, Val F1: 1.0000


Epoch 33/50: 100%|██████████| 31/31 [00:13<00:00,  2.31it/s]


Epoch 33, Loss: 0.0000, Val F1: 1.0000


Epoch 34/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 34, Loss: 0.0000, Val F1: 1.0000


Epoch 35/50: 100%|██████████| 31/31 [00:14<00:00,  2.21it/s]


Epoch 35, Loss: 0.0000, Val F1: 1.0000


Epoch 36/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 36, Loss: 0.0000, Val F1: 1.0000


Epoch 37/50: 100%|██████████| 31/31 [00:13<00:00,  2.31it/s]


Epoch 37, Loss: 0.0000, Val F1: 1.0000


Epoch 38/50: 100%|██████████| 31/31 [00:13<00:00,  2.38it/s]


Epoch 38, Loss: 0.0000, Val F1: 1.0000


Epoch 39/50: 100%|██████████| 31/31 [00:13<00:00,  2.37it/s]


Epoch 39, Loss: 0.0000, Val F1: 1.0000


Epoch 40/50: 100%|██████████| 31/31 [00:13<00:00,  2.34it/s]


Epoch 40, Loss: 0.0000, Val F1: 1.0000


Epoch 41/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 41, Loss: 0.0000, Val F1: 1.0000


Epoch 42/50: 100%|██████████| 31/31 [00:12<00:00,  2.40it/s]


Epoch 42, Loss: 0.0000, Val F1: 1.0000


Epoch 43/50: 100%|██████████| 31/31 [00:13<00:00,  2.32it/s]


Epoch 43, Loss: 0.0000, Val F1: 1.0000


Epoch 44/50: 100%|██████████| 31/31 [00:12<00:00,  2.39it/s]


Epoch 44, Loss: 0.0000, Val F1: 1.0000


Epoch 45/50: 100%|██████████| 31/31 [00:13<00:00,  2.37it/s]


Epoch 45, Loss: 0.0000, Val F1: 1.0000


Epoch 46/50: 100%|██████████| 31/31 [00:12<00:00,  2.44it/s]


Epoch 46, Loss: 0.0000, Val F1: 1.0000


Epoch 47/50: 100%|██████████| 31/31 [00:13<00:00,  2.35it/s]


Epoch 47, Loss: 0.0000, Val F1: 1.0000


Epoch 48/50: 100%|██████████| 31/31 [00:12<00:00,  2.43it/s]


Epoch 48, Loss: 0.0000, Val F1: 1.0000


Epoch 49/50: 100%|██████████| 31/31 [00:13<00:00,  2.38it/s]


Epoch 49, Loss: 0.0000, Val F1: 1.0000


Epoch 50/50: 100%|██████████| 31/31 [00:12<00:00,  2.42it/s]


Epoch 50, Loss: 0.0000, Val F1: 1.0000


In [29]:
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

test_df = pd.read_csv(TEST_CSV)
test_df.columns = test_df.columns.str.strip()
test_dataset = SoilDataset(test_df, TEST_DIR, transform, test=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

predictions, image_ids = [], []
with torch.no_grad():
    for images, ids in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())
        image_ids.extend(ids)

pred_labels = le.inverse_transform(predictions)

Predicting: 100%|██████████| 31/31 [00:11<00:00,  2.59it/s]


In [31]:
submission = pd.DataFrame({'image_id': image_ids, 'soil_type': pred_labels})
submission.to_csv("submission.csv", index=False)
print("submission.csv generated successfully!")

submission.csv generated successfully!
