In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os

zip_path = '/content/sample_data/data.zip'
extract_path = 'data'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Extracted {zip_path} to {extract_path}")

Extracted /content/sample_data/data.zip to data


In [None]:
print('hi')
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA acceleration.")
else:
    print("CUDA is not available. You will suffer from very slow training without a GPU.")
    device = torch.device("cpu")


BATCH_SIZE = 64
LEARNING_RATE = 0.0005
NUM_EPOCHS = 20
DATA_DIR = "/content/data/data"
TRAIN_VAL_SPLIT = 0.9

hi
Using CUDA acceleration.


In [6]:
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    # transforms.ColorJitter(brightness=0.3, contrast=0.3),  # maybe dont use?
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=train_transforms)

train_size = int(TRAIN_VAL_SPLIT * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transform = val_transforms

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"Classes: {full_dataset.classes}")
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

Classes: ['with_signal', 'without_signal']
Training samples: 71236
Validation samples: 7916


In [None]:
def get_model():
    model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
    original_conv1 = model.conv1
    model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

    with torch.no_grad():
        model.conv1.weight[:] = torch.mean(original_conv1.weight, dim=1, keepdim=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)
    return model

model = get_model().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 204MB/s]


In [None]:
import math

warmup_epochs = 2

total_steps = NUM_EPOCHS * len(train_loader)
warmup_steps = warmup_epochs * len(train_loader)

def lr_lambda(step):
    if step < warmup_steps:
        return float(step) / float(max(1, warmup_steps))
    else:
        progress = float(step - warmup_steps) / float(max(1, total_steps - warmup_steps))
        # cosine from 1 -> 0
        return 0.5 * (1.0 + math.cos(math.pi * progress))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [None]:
from tqdm import tqdm

def train_model(model, train_loader, val_loader, epochs=20, scheduler=None):
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
    global_step = 0

    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        print('Training...')
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} Training"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            if scheduler is not None:
                scheduler.step()
            global_step += 1

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        history['train_loss'].append(epoch_loss)
        history['train_acc'].append(epoch_acc)

        print('Validating...')
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1} Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_epoch_loss = val_loss / len(val_loader)
        val_epoch_acc = correct / total
        history['val_loss'].append(val_epoch_loss)
        history['val_acc'].append(val_epoch_acc)

        torch.save(model.state_dict(), f'model_epoch_{epoch+1}.pth')

        print(f"Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc:.4f}")
        print(f"Val Loss: {val_epoch_loss:.4f} | Val Acc: {val_epoch_acc:.4f}")

    return history

In [None]:
history = train_model(model, train_loader, val_loader, NUM_EPOCHS, scheduler=scheduler)
torch.save(model.state_dict(), "waterfall_resnet18_fullres.pth")
print("Model saved to waterfall_resnet18_fullres.pth")

Epoch 1/10
Training...


Epoch 1 Training: 100%|██████████| 2227/2227 [10:42<00:00,  3.46it/s]


Validating...


Epoch 1 Validation: 100%|██████████| 248/248 [00:35<00:00,  6.93it/s]


Train Loss: 0.2857 | Train Acc: 0.8884
Val Loss: 0.2652 | Val Acc: 0.8941
Epoch 2/10
Training...


Epoch 2 Training: 100%|██████████| 2227/2227 [10:41<00:00,  3.47it/s]


Validating...


Epoch 2 Validation: 100%|██████████| 248/248 [00:35<00:00,  6.98it/s]


Train Loss: 0.2551 | Train Acc: 0.9020
Val Loss: 0.2500 | Val Acc: 0.9021
Epoch 3/10
Training...


Epoch 3 Training: 100%|██████████| 2227/2227 [10:41<00:00,  3.47it/s]


Validating...


Epoch 3 Validation: 100%|██████████| 248/248 [00:37<00:00,  6.62it/s]


Train Loss: 0.2426 | Train Acc: 0.9073
Val Loss: 0.2590 | Val Acc: 0.9007
Epoch 4/10
Training...


Epoch 4 Training: 100%|██████████| 2227/2227 [10:39<00:00,  3.48it/s]


Validating...


Epoch 4 Validation: 100%|██████████| 248/248 [00:30<00:00,  8.24it/s]


Train Loss: 0.2337 | Train Acc: 0.9094
Val Loss: 0.2603 | Val Acc: 0.9012
Epoch 5/10
Training...


Epoch 5 Training: 100%|██████████| 2227/2227 [10:38<00:00,  3.49it/s]


Validating...


Epoch 5 Validation: 100%|██████████| 248/248 [00:31<00:00,  7.97it/s]


Train Loss: 0.2250 | Train Acc: 0.9144
Val Loss: 0.2599 | Val Acc: 0.8987
Epoch 6/10
Training...


Epoch 6 Training: 100%|██████████| 2227/2227 [10:38<00:00,  3.49it/s]


Validating...


Epoch 6 Validation: 100%|██████████| 248/248 [00:30<00:00,  8.22it/s]


Train Loss: 0.2167 | Train Acc: 0.9179
Val Loss: 0.2367 | Val Acc: 0.9102
Epoch 7/10
Training...


Epoch 7 Training: 100%|██████████| 2227/2227 [10:37<00:00,  3.49it/s]


Validating...


Epoch 7 Validation: 100%|██████████| 248/248 [00:30<00:00,  8.22it/s]


Train Loss: 0.2077 | Train Acc: 0.9214
Val Loss: 0.2341 | Val Acc: 0.9099
Epoch 8/10
Training...


Epoch 8 Training: 100%|██████████| 2227/2227 [10:37<00:00,  3.49it/s]


Validating...


Epoch 8 Validation: 100%|██████████| 248/248 [00:30<00:00,  8.09it/s]


Train Loss: 0.1955 | Train Acc: 0.9255
Val Loss: 0.2362 | Val Acc: 0.9069
Epoch 9/10
Training...


Epoch 9 Training: 100%|██████████| 2227/2227 [10:37<00:00,  3.49it/s]


Validating...


Epoch 9 Validation: 100%|██████████| 248/248 [00:29<00:00,  8.29it/s]


Train Loss: 0.1829 | Train Acc: 0.9316
Val Loss: 0.2446 | Val Acc: 0.9027
Epoch 10/10
Training...


Epoch 10 Training: 100%|██████████| 2227/2227 [10:37<00:00,  3.49it/s]


Validating...


Epoch 10 Validation: 100%|██████████| 248/248 [00:30<00:00,  8.05it/s]


Train Loss: 0.1623 | Train Acc: 0.9391
Val Loss: 0.2514 | Val Acc: 0.9079
Model saved to waterfall_resnet18_fullres.pth


In [None]:
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['train_acc'], label='Train Accuracy')
plt.plot(history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

plt.show()