In [None]:
import torch
import torch.nn as nn
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from pathlib import Path
import time

# Use GPU if available
if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using: {device}")

In [None]:
# paths
DATA_PATH = Path("../data/raw/soil-classification/Orignal-Dataset")
OUTPUTS_PATH = Path("../outputs")
CHECKPOINT_PATH = OUTPUTS_PATH / "checkpoints"
CHECKPOINT_PATH.mkdir(parents=True, exist_ok=True)

# variables (ImageNet standard)
IMG_DEFAULT_SIZE = 256
IMG_CROP_SIZE = 224
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

# hyperparameters
NUM_EPOCHS = 10
BATCH_SIZE = 32
LEARNING_RATE = 0.001
NUM_CLASSES = 7

In [None]:
train_transform = transforms.Compose([
    transforms.Resize(IMG_DEFAULT_SIZE),
    transforms.RandomCrop(IMG_CROP_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

val_transform = transforms.Compose([
    transforms.Resize(IMG_DEFAULT_SIZE),
    transforms.CenterCrop(IMG_CROP_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

In [None]:
full_dataset = ImageFolder(root=DATA_PATH, transform=train_transform)

print(f"Total images: {len(full_dataset)}")
print(f"Classes: {full_dataset.classes}")

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset)-train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform # change transform for validation set

print(f"Training set: {len(train_dataset)}")
print(f"Validation set: {len(val_dataset)}")