In [2]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

In [5]:
# ============================================
# STEP 1: DATA PREPARATION
# ============================================

# Define transformations (preprocessing)
transform = transforms.Compose([
    transforms.Resize((224, 224)),      # Resize to 224x224 (ResNet input size)
    transforms.ToTensor(),               # Convert to PyTorch tensor (0-1 range)
    transforms.Normalize(                # Normalize using ImageNet stats
        mean=[0.485, 0.456, 0.406],     # Standard for pre-trained models
        std=[0.229, 0.224, 0.225]
    )
])

# Load dataset using ImageFolder
# PyTorch automatically:
# 1. Reads folder structure
# 2. Assigns labels: cloudy=0, desert=1, green_area=2, water=3
# 3. Applies transformations
dataset = datasets.ImageFolder(
    root='../data/',  # Your data folder
    transform=transform
)

# Check what PyTorch found
print(f"Number of images: {len(dataset)}")
print(f"Classes: {dataset.classes}")
print(f"Class to index mapping: {dataset.class_to_idx}")

# Output example:
# Number of images: 4800
# Classes: ['cloudy', 'desert', 'green_area', 'water']
# Class to index mapping: {'cloudy': 0, 'desert': 1, 'green_area': 2, 'water': 3}

# ============================================
# SPLIT INTO TRAIN/VALIDATION
# ============================================

# Split 80% train, 20% validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(
    dataset, 
    [train_size, val_size]
)

print(f"Training images: {len(train_dataset)}")
print(f"Validation images: {len(val_dataset)}")


Number of images: 5631
Classes: ['cloudy', 'desert', 'green_area', 'water']
Class to index mapping: {'cloudy': 0, 'desert': 1, 'green_area': 2, 'water': 3}
Training images: 4504
Validation images: 1127


In [6]:
# ============================================
# CREATE DATA LOADERS (batching)
# ============================================

# DataLoader groups images into batches
# Instead of processing 1 image at a time, process 32 together (faster)
train_loader = DataLoader(
    train_dataset,
    batch_size=32,        # Process 32 images at once
    shuffle=True,         # Randomize order each epoch
    num_workers=4         # Parallel data loading
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False
)

# ==========

In [7]:
# ============================================
# VISUALIZE WHAT WE'RE WORKING WITH
# ============================================

# Get one batch to inspect
images, labels = next(iter(train_loader))

print(f"Batch shape: {images.shape}")
# Output: torch.Size([32, 3, 224, 224])
# Meaning: 32 images, 3 color channels, 224x224 pixels

print(f"Labels in this batch: {labels}")
# Output: tensor([2, 0, 3, 1, ...])
# These are class indices: 0=cloudy, 1=desert, 2=green, 3=water

Batch shape: torch.Size([32, 3, 224, 224])
Labels in this batch: tensor([1, 3, 2, 1, 3, 0, 0, 0, 3, 0, 2, 0, 2, 2, 2, 1, 2, 2, 2, 3, 2, 2, 0, 1,
        2, 3, 2, 3, 3, 3, 2, 0])


## STEP 4: BUILD THE MODEL (ResNet50)

In [8]:
import torchvision.models as models

In [9]:

# ============================================
# LOAD PRE-TRAINED RESNET50
# ============================================

# Load ResNet50 with weights trained on ImageNet
model = models.resnet50(pretrained=True)

print(model)  # See the architecture

# Output shows:
# ResNet(
#   (conv1): Conv2d(3, 64, kernel_size=(7, 7), ...)
#   (bn1): BatchNorm2d(64, ...)
#   (relu): ReLU(inplace=True)
#   ...
#   (layer1): Sequential(...)  ← 3 blocks
#   (layer2): Sequential(...)  ← 4 blocks
#   (layer3): Sequential(...)  ← 6 blocks
#   (layer4): Sequential(...)  ← 3 blocks
#   (avgpool): AdaptiveAvgPool2d(...)
#   (fc): Linear(2048, 1000)   ← Original: 1000 ImageNet classes
# )



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\cars7/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


100.0%


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
# ============================================
# MODIFY FOR OUR 4 CLASSES
# ============================================

# Freeze all layers except the last one (transfer learning)
for param in model.parameters():
    param.requires_grad = False  # Don't update these weights

# Replace final layer for 4 classes instead of 1000
num_classes = 4  # cloudy, desert, green, water
model.fc = nn.Linear(model.fc.in_features, num_classes)
# Now only this layer will be trained


In [11]:
# ============================================
# HOW MODEL PROCESSES ONE IMAGE
# ============================================

# Let's trace one image through the network
sample_image = images[0:1]  # Take first image from batch
print(f"Input shape: {sample_image.shape}")  # [1, 3, 224, 224]

# Forward pass (step by step):
# 1. conv1: Initial feature extraction
#    Input: [1, 3, 224, 224] → Output: [1, 64, 112, 112]
#    Detects basic edges, colors, textures

# 2. layer1-4: Deep feature extraction
#    [1, 64, 112, 112] → ... → [1, 2048, 7, 7]
#    Detects complex patterns: cloud textures, water ripples, sand patterns

# 3. avgpool: Average pooling
#    [1, 2048, 7, 7] → [1, 2048]
#    Summarize spatial information

# 4. fc: Final classification layer
#    [1, 2048] → [1, 4]
#    Output: [0.2, 0.1, 0.05, 0.65]
#    Meaning: [cloudy: 20%, desert: 10%, green: 5%, water: 65%]

# Run the actual forward pass
with torch.no_grad():
    output = model(sample_image)
    print(f"Raw output: {output}")
    # Output: tensor([[-0.234, 0.567, -1.234, 2.345]])
    
    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(output, dim=1)
    print(f"Probabilities: {probabilities}")
    # Output: tensor([[0.15, 0.25, 0.05, 0.55]])
    
    # Get prediction
    predicted_class = torch.argmax(probabilities, dim=1)
    print(f"Predicted class: {predicted_class.item()}")
    # Output: 3 (which means 'water')

Input shape: torch.Size([1, 3, 224, 224])
Raw output: tensor([[-0.5205,  0.0110, -0.0518, -0.1370]])
Probabilities: tensor([[0.1734, 0.2950, 0.2771, 0.2545]])
Predicted class: 1


### STEP 5: TRAINING THE MODEL

In [12]:
# ============================================
# TRAINING SETUP
# ============================================

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"Training on: {device}")

# Loss function: How wrong is the prediction?
criterion = nn.CrossEntropyLoss()

# Optimizer: How to update weights to improve
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
# Only training the final layer (fc)

Training on: cpu


In [None]:
# ============================================
# TRAINING LOOP WITH GPU SUPPORT
# ============================================

num_epochs = 5  # Number of times to go through the entire dataset

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        # Move data to GPU
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Print progress every 20 batches
        if (batch_idx + 1) % 20 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], '
                  f'Loss: {loss.item():.4f}, Acc: {100 * correct / total:.2f}%')
    
    # Epoch summary
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    print(f'\nEpoch [{epoch+1}/{num_epochs}] Summary:')
    print(f'Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.2f}%')
    
    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    val_loss = 0.0
    
    with torch.no_grad():
        for images, labels in val_loader:
            # Move data to GPU
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    
    val_accuracy = 100 * val_correct / val_total
    val_loss_avg = val_loss / len(val_loader)
    print(f'Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
    print('-' * 60)

print('\nTraining completed!')

In [None]:
import torch

# Check CUDA availability
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

CUDA available: False
CUDA version: None
GPU device: None
Number of GPUs: 0
