In [None]:
"""
Author: Annam.ai IIT Ropar
Team Members: Aman Sagar
Leaderboard Rank: 16

"""


# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Paths
import os
ROOT_DIR = "/content/drive/MyDrive/soil-classification-part-2/soil_competition-2025"
TRAIN_DIR = os.path.join(ROOT_DIR, "train")
TEST_DIR = os.path.join(ROOT_DIR, "test")
TRAIN_CSV = os.path.join(ROOT_DIR, "train_labels.csv")
TEST_CSV = os.path.join(ROOT_DIR, "test_ids.csv")


Mounted at /content/drive


In [None]:
# Install required packages
!pip install -q torch torchvision pandas scikit-learn

# Imports
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Preprocessing
df = pd.read_csv(TRAIN_CSV)
df_train, df_val = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [None]:
# Dataset
class BinarySoilDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, test=False):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.test:
            return image, img_id
        else:
            label = self.df.iloc[idx]['label']
            return image, label



In [None]:
# Loaders
BATCH_SIZE = 64

train_dataset = BinarySoilDataset(df_train, TRAIN_DIR, transform=train_transforms)
val_dataset = BinarySoilDataset(df_val, TRAIN_DIR, transform=val_transforms)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

test_df = pd.read_csv(TEST_CSV)
test_dataset = BinarySoilDataset(test_df, TEST_DIR, transform=val_transforms, test=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)


In [None]:
# Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = True

model.fc = nn.Linear(model.fc.in_features, 1)
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 131MB/s]


In [None]:
# Train/Val functions
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, correct = 0.0, 0
    for inputs, labels in tqdm(loader):
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss, correct = 0.0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels).sum().item()
    return running_loss / len(loader.dataset), correct / len(loader.dataset)

class EarlyStopping:
    def __init__(self, patience=2, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_loss = float('inf')
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


In [None]:
# Training
# EPOCHS = 10
# for epoch in range(EPOCHS):
#     train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
#     val_loss, val_acc = evaluate(model, val_loader, criterion, device)
#     print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f} Acc={train_acc:.4f} | Val Loss={val_loss:.4f} Acc={val_acc:.4f}")


early_stopper = EarlyStopping(patience=2)
EPOCHS = 10

for epoch in range(EPOCHS):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f} Acc={train_acc:.4f} | Val Loss={val_loss:.4f} Acc={val_acc:.4f}")

    early_stopper(val_loss)
    if early_stopper.early_stop:
        print("Early stopping triggered.")
        break


100%|██████████| 16/16 [03:45<00:00, 14.10s/it]


Epoch 1: Train Loss=0.4317 Acc=0.8731 | Val Loss=0.6585 Acc=0.8653


100%|██████████| 16/16 [00:15<00:00,  1.05it/s]


Epoch 2: Train Loss=0.0977 Acc=0.9980 | Val Loss=0.1597 Acc=0.9673


100%|██████████| 16/16 [00:15<00:00,  1.01it/s]


Epoch 3: Train Loss=0.0433 Acc=0.9990 | Val Loss=0.0477 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.05it/s]


Epoch 4: Train Loss=0.0245 Acc=1.0000 | Val Loss=0.0265 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.04it/s]


Epoch 5: Train Loss=0.0202 Acc=1.0000 | Val Loss=0.0169 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.04it/s]


Epoch 6: Train Loss=0.0184 Acc=1.0000 | Val Loss=0.0179 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.04it/s]


Epoch 7: Train Loss=0.0166 Acc=1.0000 | Val Loss=0.0142 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.02it/s]


Epoch 8: Train Loss=0.0120 Acc=1.0000 | Val Loss=0.0112 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.01it/s]


Epoch 9: Train Loss=0.0104 Acc=1.0000 | Val Loss=0.0094 Acc=1.0000


100%|██████████| 16/16 [00:15<00:00,  1.01it/s]


Epoch 10: Train Loss=0.0091 Acc=1.0000 | Val Loss=0.0084 Acc=1.0000


In [None]:
# Inference

model.eval()
all_preds, all_ids = [], []

with torch.no_grad():
    for inputs, img_ids in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        probs = torch.sigmoid(outputs).squeeze(1)
        preds = (probs > 0.5).long().cpu().numpy()  # Adjust threshold if necessary
        all_preds.extend(preds)
        all_ids.extend(img_ids)

import cv2

def calculate_blur(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return cv2.Laplacian(image, cv2.CV_64F).var()

# Set your blur threshold (tune if needed)
BLUR_THRESHOLD = 100

# Apply blur logic
final_preds = []
for img_id, resnet_pred in zip(all_ids, all_preds):
    img_path = os.path.join(TEST_DIR, img_id)
    blur_score = calculate_blur(img_path)

    if blur_score < BLUR_THRESHOLD:
        final_preds.append(0)  # Very blurry → likely non-soil
    else:
        final_preds.append(resnet_pred)  # Trust model if not blurry

# Save final predictions
submission = pd.DataFrame({
    "image_id": all_ids,
    "label": final_preds
})
submission.to_csv("submission-part2_blur_corrected.csv", index=False)
print("✅ Saved: submission-part2_blur_corrected.csv")


✅ Saved: submission-part2_blur_corrected.csv


In [None]:
from google.colab import files
files.download('submission-part2_blur_corrected.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
print("Blur score for sample images:")
for i in range(5):
    img_path = os.path.join(TEST_DIR, all_ids[i])
    print(all_ids[i], calculate_blur(img_path))


           image_id  label
0  img_ed005410.jpg      1
1  img_0c5ecd2a.jpg      1
2  img_ed713bb5.jpg      1
3  img_12c58874.jpg      1
4  img_eff357af.jpg      1
label
1    1222
Name: count, dtype: int64


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,1222
