In [1]:
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
import timm

In [2]:
import torch
import os
from sklearn.metrics import classification_report
import numpy as np

In [3]:
data_augmenter = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2,contrast=0.2,saturation=0.2),
    transforms.GaussianBlur(3) ,
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_transformer = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [4]:
import csv
with open(r"/content/drive/MyDrive/Annam ai/soil_classification-2025/train_labels.csv") as f:
  image = {}
  for i in csv.reader(f):
    try: image[i[1]] += 1
    except: image[i[1]] = 1
  print(image)

{'soil_type': 1, 'Alluvial soil': 528, 'Clay soil': 199, 'Red soil': 264, 'Black Soil': 231}


In [6]:
from sklearn.model_selection import train_test_split
ifiles,images = {},[]
with open(r"/content/drive/MyDrive/Annam ai/soil_classification-2025/train_labels.csv") as f:
  for row in csv.reader(f):
    ifiles[row[0]] = row[1]
    images.append(row[0])
  train,val = train_test_split(images[1:],test_size=0.2)
  try:
    for i in ["Alluvial","Red","Black","Clay"]:
      os.makedirs(f"/content/drive/MyDrive/Annam ai/data/train/{i}")
      os.makedirs(f"/content/drive/MyDrive/Annam ai/data/val/{i}")

    for i in train:
      type_s = ifiles[i].split()[0]
      try :
        if type_s in ["Alluvial","Red","Black","Clay"]:
          os.rename(f"/content/drive/MyDrive/Annam ai/soil_classification-2025/train/{i}",f"/content/drive/MyDrive/Annam ai/data/train/{type_s}/{i}")
      except : pass

    for i in val:
      try :
        type_s = ifiles[i].split()[0]
        if type_s in ["Alluvial","Red","Black","Clay"]:
          os.rename(f"/content/drive/MyDrive/Annam ai/soil_classification-2025/train/{i}",f"/content/drive/MyDrive/Annam ai/data/val/{type_s}/{i}")
      except : pass
  except : pass


In [41]:
with open("/content/drive/MyDrive/Annam ai/data/test_ans.csv",'w') as f:
  f.write("image_id,label\n")
  f.writelines([f"{i},{ifiles[i].split()[0]}\n" for i in val])

In [7]:
data_dir = r"/content/drive/MyDrive/Annam ai/data/train"

train_dataset = datasets.ImageFolder(root=data_dir, transform=data_augmenter)
targets = [label for _, label in train_dataset]
class_counts = [targets.count(i) for i in range(4)]
print(class_counts)
weights = 1. / torch.tensor(class_counts, dtype=torch.float)
samples_weights = weights[targets]
sampler = WeightedRandomSampler(samples_weights, len(samples_weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)

[506, 219, 190, 247]


In [9]:
data_dir = r"/content/drive/MyDrive/Annam ai/data/val"

val_dataset = datasets.ImageFolder(root=data_dir, transform=test_transformer)
val_loader = DataLoader(val_dataset, batch_size=32)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=4)
model = model.to(device)

class_weights = torch.tensor(weights).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

  class_weights = torch.tensor(weights).to(device)


In [10]:
def train(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    return running_loss / len(loader), acc

In [35]:
def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))

In [37]:
epochs = 50
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")

Epoch 1/15 - Loss: 0.0119, Acc: 0.9948
Epoch 2/15 - Loss: 0.0115, Acc: 0.9940
Epoch 3/15 - Loss: 0.0159, Acc: 0.9957
Epoch 4/15 - Loss: 0.0116, Acc: 0.9966
Epoch 5/15 - Loss: 0.0086, Acc: 0.9966
Epoch 6/15 - Loss: 0.0518, Acc: 0.9923
Epoch 7/15 - Loss: 0.0302, Acc: 0.9888
Epoch 8/15 - Loss: 0.0394, Acc: 0.9897
Epoch 9/15 - Loss: 0.0160, Acc: 0.9923
Epoch 10/15 - Loss: 0.0182, Acc: 0.9957
Epoch 11/15 - Loss: 0.0203, Acc: 0.9948
Epoch 12/15 - Loss: 0.0141, Acc: 0.9957
Epoch 13/15 - Loss: 0.0152, Acc: 0.9948
Epoch 14/15 - Loss: 0.0076, Acc: 0.9966
Epoch 15/15 - Loss: 0.0118, Acc: 0.9948


In [40]:
evaluate(model, val_loader)

              precision    recall  f1-score   support

    Alluvial       1.00      0.91      0.95        22
       Black       0.92      1.00      0.96        12
        Clay       0.90      1.00      0.95         9
         Red       1.00      1.00      1.00        15

    accuracy                           0.97        58
   macro avg       0.96      0.98      0.96        58
weighted avg       0.97      0.97      0.97        58



In [20]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Annam ai/soil_classification-2025/model1.pth')