In [3]:
import torch
import torchvision
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.metrics import f1_score
import pandas as pd
from PIL import Image
import os

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Custom Dataset
class SoilDataset(Dataset):
    def __init__(self, img_dir, labels_df, transform=None):
        self.img_dir = img_dir
        self.labels = labels_df
        self.transform = transform
        self.classes = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels.iloc[idx, 0])
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return self.__getitem__((idx + 1) % len(self))  # Skip problematic image

        label = self.class_to_idx[self.labels.iloc[idx, 1]]
        if self.transform:
            image = self.transform(image)
        return image, label

# Transforms
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load data
train_df = pd.read_csv(r"C:\Users\LENOVO FLEX\Downloads\soil-classification\soil_classification-2025\train_labels.csv")
train_dataset = SoilDataset(r"C:\Users\LENOVO FLEX\Downloads\soil-classification\soil_classification-2025\train", train_df, train_transform)

# DataLoader with num_workers=0
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)

# Class weights
class_counts = train_df['soil_type'].value_counts()
class_weights = 1. / class_counts
class_weights = class_weights / class_weights.sum()
weights = torch.tensor([class_weights[c] for c in train_dataset.classes], dtype=torch.float).to(device)

# Model
model = torchvision.models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop with F1 score
def train_model(model, dataloader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_preds = []
        all_labels = []

        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        epoch_f1 = f1_score(all_labels, all_preds, average='macro')
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader):.4f}, F1 Score: {epoch_f1:.4f}")

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# Predict function
def predict(model, test_dir, test_ids, transform):
    model.eval()
    predictions = []
    with torch.no_grad():
        for img_id in test_ids:
            img_path = os.path.join(test_dir, img_id)
            try:
                image = Image.open(img_path).convert('RGB')
                image = transform(image).unsqueeze(0).to(device)
                output = model(image)
                _, pred = torch.max(output, 1)
                predictions.append(train_dataset.classes[pred.item()])
            except Exception as e:
                print(f"Error reading {img_id}: {e}")
                predictions.append("Unknown")
    return predictions

# Generate submission
test_ids = pd.read_csv(r"C:\Users\LENOVO FLEX\Downloads\soil-classification\soil_classification-2025\test_ids.csv")['image_id'].tolist()
preds = predict(model, r"C:\Users\LENOVO FLEX\Downloads\soil-classification\soil_classification-2025\test", test_ids, test_transform)
submission = pd.DataFrame({'image_id': test_ids, 'soil_type': preds})
submission.to_csv(r"C:\Users\LENOVO FLEX\Downloads\soil-classification\soil_classification-2025\sample_submission.csv", index=False)




Epoch 1/10, Loss: 0.4742, F1 Score: 0.8166
Epoch 2/10, Loss: 0.1859, F1 Score: 0.9203
Epoch 3/10, Loss: 0.1730, F1 Score: 0.9383
Epoch 4/10, Loss: 0.1580, F1 Score: 0.9346
Epoch 5/10, Loss: 0.1595, F1 Score: 0.9380
Epoch 6/10, Loss: 0.1364, F1 Score: 0.9385
Epoch 7/10, Loss: 0.1029, F1 Score: 0.9648
Epoch 8/10, Loss: 0.0610, F1 Score: 0.9796
Epoch 9/10, Loss: 0.0714, F1 Score: 0.9716
Epoch 10/10, Loss: 0.0726, F1 Score: 0.9706
