In [None]:
import os
import shutil
import random
from pathlib import Path

source_dir = ""
train_dir = ""
test_dir = ""
test_ratio = 0.2 

for split_dir in [train_dir, test_dir]:
    os.makedirs(split_dir, exist_ok=True)

for class_dir in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_dir)
    if not os.path.isdir(class_path):
        continue

    images = list(Path(class_path).glob("*.*"))
    random.shuffle(images)
    
    test_size = int(len(images) * test_ratio)
    test_images = images[:test_size]
    train_images = images[test_size:]

    train_class_dir = os.path.join(train_dir, class_dir)
    os.makedirs(train_class_dir, exist_ok=True)
    for img in train_images:
        shutil.copy(img, os.path.join(train_class_dir, img.name))

    test_class_dir = os.path.join(test_dir, class_dir)
    os.makedirs(test_class_dir, exist_ok=True)
    for img in test_images:
        shutil.copy(img, os.path.join(test_class_dir, img.name))

print("Dataset split complete.")


Dataset split complete.


In [None]:
from PIL import Image, UnidentifiedImageError
import os
from pathlib import Path

dataset_dir = Path('')

bad_files = []

for img_path in dataset_dir.rglob("*.*"):
    if img_path.is_file():
        try:
            with Image.open(img_path) as img:
                img.verify()
        except (UnidentifiedImageError, OSError):
            bad_files.append(img_path)

for bad_file in bad_files:
    print(f"Removing corrupted image: {bad_file}")
    os.remove(bad_file)

print(f"Removed {len(bad_files)} bad files.")


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim

transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

dataset = ImageFolder(root='', transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

model = resnet18(num_classes=len(dataset.classes))
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(2):
    for inputs, labels in dataloader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [None]:
# GPU version
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim

transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

dataset = ImageFolder(root='/Users/ravanryj/Desktop/team-00-project/Hezi_Jiang_44/archive/', transform=transform)

dataloader = DataLoader(
    dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = resnet18(num_classes=len(dataset.classes))
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Grayscale fix
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    running_loss = 0.0
    model.train()
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    print(f"Epoch {epoch + 1} complete — Avg Loss: {avg_loss:.4f}")

print("Training finished!")
