In [1]:
# 02_train_patch_classifier.ipynb

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

# ------------------------------
# Dataset Class
# ------------------------------
class PatchDataset(Dataset):
    def __init__(self, patches, labels):
        self.patches = patches
        self.labels = labels

    def __len__(self):
        return len(self.patches)

    def __getitem__(self, idx):
        x = self.patches[idx][np.newaxis, :, :]  # [1, 128, 128]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# ------------------------------
# Simple CNN Model
# ------------------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(32 * 32 * 32, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        return self.model(x)

# ------------------------------
# Load Data
# ------------------------------
patches_all = []
labels_all = []

for day in [196, 197, 198, 200, 205]:
    patch_path = f"../data/processed/patches_day{day}.npy"
    label_path = f"../data/processed/labels_day{day}.npy"
    
    patches = np.load(patch_path)
    labels = np.load(label_path)
    
    patches_all.append(patches)
    labels_all.append(labels)

X = np.concatenate(patches_all, axis=0)
y = np.concatenate(labels_all, axis=0)
print(f"✅ Total Patches: {len(X)}, Heat Islands: {sum(y)}")

# ------------------------------
# Train/Val Split
# ------------------------------
dataset = PatchDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)

# ------------------------------
# Training
# ------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loss_list = []
val_loss_list = []

for epoch in range(10):
    model.train()
    running_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    train_loss_list.append(running_loss / len(train_loader))

    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

    val_loss_list.append(val_loss / len(val_loader))
    acc = correct / total
    print(f"Epoch {epoch+1}: Train Loss={train_loss_list[-1]:.4f}, Val Loss={val_loss_list[-1]:.4f}, Val Acc={acc:.2f}")

# ------------------------------
# Plot Loss
# ------------------------------
plt.plot(train_loss_list, label="Train Loss")
plt.plot(val_loss_list, label="Val Loss")
plt.legend()
plt.title("Loss over Epochs")
plt.show()


✅ Total Patches: 0, Heat Islands: 0


ValueError: num_samples should be a positive integer value, but got num_samples=0

In [2]:
import glob
import numpy as np

patch_files = sorted(glob.glob("../data/processed/patches_day*.npy"))
label_files = sorted(glob.glob("../data/processed/labels_day*.npy"))

print("✅ Patch 文件数量:", len(patch_files))
print("✅ Label 文件数量:", len(label_files))

all_patches = np.concatenate([np.load(f) for f in patch_files])
all_labels = np.concatenate([np.load(f) for f in label_files])

print("✅ 总样本数:", len(all_patches))
print("✅ 样本形状:", all_patches[0].shape)
print("✅ 正样本（热岛）数:", sum(all_labels))
print("✅ 负样本（非热岛）数:", len(all_labels) - sum(all_labels))


✅ Patch 文件数量: 5
✅ Label 文件数量: 5
✅ 总样本数: 0


IndexError: index 0 is out of bounds for axis 0 with size 0