In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets, models
from sklearn.metrics import accuracy_score, f1_score
from PIL import Image
import pandas as pd


In [None]:
# --------- Data Transforms ---------
train_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])

In [None]:
val_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])

test_transform = val_transform # Same as validation

In [None]:
# --------- Paths (update these) ---------
train_dir = '/kaggle/input/binary-data-2/Dataset2' # Folder with only soil images
test_dir = '/kaggle/input/binary-data-2/ttest3' # Contains both soil and non-soil

In [None]:
# --------- Custom Dataset for Binary Classification ---------
class BinarySoilDataset(datasets.ImageFolder):
def _init_(self, root, transform=None, is_train=True):
super()._init_(root, transform=transform)
self.is_train = is_train

def _getitem_(self, index):
img, _ = super()._getitem_(index)
if self.is_train:
return img, torch.tensor(1, dtype=torch.long)
else:
path, _ = self.samples[index]
label = 1 if any(soil_type in path for soil_type in self.classes) else 0
return img, torch.tensor(label, dtype=torch.long)

In [None]:
# --------- Load Datasets ---------
train_dataset = BinarySoilDataset(root=train_dir, transform=train_transform, is_train=True)
test_dataset = BinarySoilDataset(root=test_dir, transform=test_transform, is_train=False)

In [None]:
# Split train into train and val
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform

In [None]:
# --------- DataLoaders ---------
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


In [None]:
# --------- Device ---------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# --------- ResNet50 Model for Binary Classification ---------
def get_resnet50_binary():
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
nn.Linear(num_ftrs, 512),
nn.ReLU(True),
nn.Dropout(0.5),
nn.Linear(512, 2)
)
for param in model.layer4.parameters():
param.requires_grad = True
return model

model = get_resnet50_binary().to(device)


In [None]:
# --------- Loss and Optimizer ---------
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [None]:
# --------- Training Function ---------
def train_one_epoch(model, dataloader, criterion, optimizer, device):
model.train()
running_loss = 0.0
for images, labels in dataloader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
return running_loss / len(dataloader.dataset)

In [None]:
# --------- Evaluation Function ---------
def evaluate(model, dataloader, device):
model.eval()
preds, targets = [], []
with torch.no_grad():
for images, labels in dataloader:
images = images.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
preds.extend(predicted.cpu().numpy())
targets.extend(labels.cpu().numpy())
acc = accuracy_score(targets, preds)
f1 = f1_score(targets, preds, average='binary')
return acc, f1

In [None]:
# --------- Main Training Loop ---------
num_epochs = 8
best_val_acc = 0.0

for epoch in range(num_epochs):
train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
val_acc, val_f1 = evaluate(model, val_loader, device)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Val Acc: {val_acc*100:.2f}%, Val F1: {val_f1:.4f}")
if val_acc > best_val_acc:
best_val_acc = val_acc
torch.save(model.state_dict(), 'best_soil_classifier.pth')


In [None]:
# Load best model for testing
model.load_state_dict(torch.load('best_soil_classifier.pth'))


In [None]:
# --------- Test Evaluation ---------
test_acc, test_f1 = evaluate(model, test_loader, device)
print(f"\nFinal Test Accuracy: {test_acc*100:.2f}%")
print(f"Final Test F1 Score: {test_f1:.4f}")

In [None]:
# --------- Predict and Save to CSV ---------
model.eval()
image_paths = [s[0] for s in test_dataset.samples]
predictions = []

In [None]:
with torch.no_grad():
for img_path in image_paths:
img = Image.open(img_path).convert('RGB')
img = test_transform(img).unsqueeze(0).to(device)
output = model(img)
_, pred = torch.max(output, 1)
predictions.append((os.path.basename(img_path), pred.item()))


In [None]:
# Create and save CSV
df = pd.DataFrame(predictions, columns=["image_id", "label"])
df.to_csv("test_predictions.csv", index=False)
print("\nPredictions saved to test_predictions.csv") generate an image on the model architecture