In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:
x = torch.randn(1024, 1024, device="cuda")
y = x @ x
torch.cuda.synchronize()
print("CUDA compute ok")


CUDA compute ok


In [3]:
import os
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
import rasterio
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights

IMG_SIZE = 384
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]
PAD_WIDTH = 6
NUM_CLASSES = 7

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

CUDA available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [4]:
DATA = r"C:\SP2026DS\dsc-competition-2026-sdl-satellite-detection"

train_dir = os.path.join(DATA, "train_images/train_images")
test_dir  = os.path.join(DATA, "test_images/test_images")
label_csv = os.path.join(DATA, "train_labels.csv")

labels_df = pd.read_csv(label_csv)
labels_df["image_id"] = labels_df["image_id"].astype(str)
labels_df["category_id"] = labels_df["category_id"].astype(int)
labels_df.head()

Unnamed: 0,image_id,category_id
0,1,0
1,2,-1
2,3,0
3,4,0
4,5,0


In [5]:
unique_sat_ids = sorted(labels_df["category_id"].unique())
id_to_idx = {sid: i for i, sid in enumerate(unique_sat_ids)}
idx_to_id = {i: sid for sid, i in id_to_idx.items()}

labels_df["label_id"] = labels_df["category_id"].map(id_to_idx).astype(int)
mapping_df = labels_df[["category_id", "label_id"]].drop_duplicates().sort_values("label_id")
print(mapping_df.to_string(index=False))

 category_id  label_id
          -1         0
           0         1
           1         2
           2         3
           3         4
           4         5
           5         6


In [6]:
def read_tif_as_hwc_float01(path: str) -> np.ndarray:
    with rasterio.open(path) as src:
        arr = src.read()

    if arr.ndim == 2:
        arr = arr[:, :, None]
    elif arr.ndim == 3:
        arr = np.transpose(arr, (1, 2, 0))
    else:
        raise ValueError(f"Unexpected tif array shape: {arr.shape}")

    arr = arr.astype(np.float32)

    if arr.shape[2] >= 3:
        arr = arr[:, :, :3]
    else:
        arr = np.repeat(arr, 3, axis=2)

    mn = float(np.nanmin(arr))
    mx = float(np.nanmax(arr))
    arr = (arr - mn) / (mx - mn + 1e-6)
    arr = np.nan_to_num(arr, nan=0.0, posinf=1.0, neginf=0.0)
    return arr


In [7]:
class SatelliteDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, pad_width=PAD_WIDTH):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.pad_width = pad_width

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = str(row["image_id"]).zfill(self.pad_width)
        img_path = os.path.join(self.img_dir, f"{img_id}.tif")

        img = read_tif_as_hwc_float01(img_path)

        if self.transform:
            img = self.transform(img)

        label = int(row["label_id"])
        return img, label


In [8]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)), 
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

In [9]:
train_df, val_df = train_test_split(
    labels_df,
    test_size=0.2,
    random_state=123,
    stratify=labels_df["label_id"],
)
train_df = train_df.reset_index(drop=True)
val_df   = val_df.reset_index(drop=True)

train_ds = SatelliteDataset(train_df, train_dir, transform=train_transform, pad_width=PAD_WIDTH)
val_ds   = SatelliteDataset(val_df, train_dir, transform=val_transform, pad_width=PAD_WIDTH)

train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=8, shuffle=False, num_workers=0, pin_memory=True)

In [10]:
weights = models.ConvNeXt_Tiny_Weights.DEFAULT

try:
    model = models.convnext_tiny(weights=weights)
except Exception as e:
    print("Pretrained weights failed to load, weights=None.")
    print("Error:", e)
    model = models.convnext_tiny(weights=None)

model.classifier[2] = nn.Linear(model.classifier[2].in_features, NUM_CLASSES)

model = model.to(DEVICE)

In [11]:
counts = train_df["label_id"].value_counts()

weights = torch.zeros(NUM_CLASSES, dtype=torch.float32)
for i in range(NUM_CLASSES):
    weights[i] = 1.0 / counts.get(i, 1)
weights = weights / weights.sum()

criterion = nn.CrossEntropyLoss(weight=weights.to(DEVICE))
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

In [12]:
print(len(train_ds), len(train_loader))
import time
t0=time.time(); _=next(iter(train_loader)); print("first batch sec:", time.time()-t0)

31454 3932


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


first batch sec: 0.3289318084716797


In [13]:
EPOCHS = 30

use_cuda = (DEVICE.type == "cuda")
scaler = torch.cuda.amp.GradScaler(enabled=use_cuda)
print("AMP enabled:", scaler.is_enabled())

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
best_f1 = -1.0
best_state = None

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        imgs = imgs.to(DEVICE, non_blocking=True)
        labels = labels.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.cuda.amp.autocast(enabled=use_cuda):
            outputs = model(imgs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        if i == 0:
            print("first batch done, loss =", float(loss))
            
        running_loss += loss.item() * labels.size(0)
        preds = outputs.argmax(dim=1)
        running_correct += (preds == labels).sum().item()
        running_total += labels.size(0)

    train_loss = running_loss / running_total
    train_acc = running_correct / running_total

    model.eval()
    val_correct = 0
    val_total = 0
    val_y_true = []
    val_y_pred = []

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(DEVICE, non_blocking=True)
            labels = labels.to(DEVICE, non_blocking=True)

            outputs = model(imgs)
            preds = outputs.argmax(dim=1)

            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

            # collect for F1
            val_y_true.extend(labels.detach().cpu().numpy().tolist())
            val_y_pred.extend(preds.detach().cpu().numpy().tolist())

    val_acc = val_correct / max(val_total, 1)

    val_f1_weighted = f1_score(val_y_true, val_y_pred, average="weighted")
    val_f1_macro = f1_score(val_y_true, val_y_pred, average="macro")

    if val_f1_weighted > best_f1:
        best_f1 = val_f1_weighted
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    current_lr = optimizer.param_groups[0]["lr"]
    print(
        f"Epoch {epoch+1}/{EPOCHS} | lr: {current_lr:.6f} | "
        f"Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
        f"Val Acc: {val_acc:.4f} | Val F1(w): {val_f1_weighted:.4f} | Val F1(m): {val_f1_macro:.4f}"
    )

    scheduler.step()

# restore best weights at end
if best_state is not None:
    model.load_state_dict(best_state)
    model.to(DEVICE)
    print(f"Best model by weighted F1 = {best_f1:.4f}")

  scaler = torch.cuda.amp.GradScaler(enabled=use_cuda)


AMP enabled: True


  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 1/30: 100%|██████████| 3932/3932 [23:39<00:00,  2.77it/s]


Epoch 1/30 | lr: 0.000100 | Loss: 1.0377 | Train Acc: 0.5800 | Val Acc: 0.6989 | Val F1(w): 0.6966 | Val F1(m): 0.7084


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 2/30: 100%|██████████| 3932/3932 [23:33<00:00,  2.78it/s]


Epoch 2/30 | lr: 0.000100 | Loss: 0.7743 | Train Acc: 0.6915 | Val Acc: 0.7379 | Val F1(w): 0.7368 | Val F1(m): 0.7558


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 3/30: 100%|██████████| 3932/3932 [22:24<00:00,  2.92it/s]


Epoch 3/30 | lr: 0.000099 | Loss: 0.7168 | Train Acc: 0.7174 | Val Acc: 0.7546 | Val F1(w): 0.7508 | Val F1(m): 0.7619


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 4/30: 100%|██████████| 3932/3932 [22:15<00:00,  2.94it/s]


Epoch 4/30 | lr: 0.000098 | Loss: 0.6700 | Train Acc: 0.7358 | Val Acc: 0.7842 | Val F1(w): 0.7849 | Val F1(m): 0.7992


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 5/30: 100%|██████████| 3932/3932 [22:09<00:00,  2.96it/s]


Epoch 5/30 | lr: 0.000096 | Loss: 0.6395 | Train Acc: 0.7476 | Val Acc: 0.7916 | Val F1(w): 0.7904 | Val F1(m): 0.8031


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 6/30: 100%|██████████| 3932/3932 [22:17<00:00,  2.94it/s]


Epoch 6/30 | lr: 0.000093 | Loss: 0.6147 | Train Acc: 0.7543 | Val Acc: 0.7887 | Val F1(w): 0.7876 | Val F1(m): 0.7999


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 7/30: 100%|██████████| 3932/3932 [22:31<00:00,  2.91it/s]


Epoch 7/30 | lr: 0.000090 | Loss: 0.5944 | Train Acc: 0.7635 | Val Acc: 0.8128 | Val F1(w): 0.8127 | Val F1(m): 0.8188


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 8/30: 100%|██████████| 3932/3932 [21:45<00:00,  3.01it/s]


Epoch 8/30 | lr: 0.000087 | Loss: 0.5704 | Train Acc: 0.7750 | Val Acc: 0.8206 | Val F1(w): 0.8195 | Val F1(m): 0.8293


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 9/30: 100%|██████████| 3932/3932 [21:44<00:00,  3.01it/s]


Epoch 9/30 | lr: 0.000083 | Loss: 0.5534 | Train Acc: 0.7795 | Val Acc: 0.8049 | Val F1(w): 0.8057 | Val F1(m): 0.8132


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 10/30: 100%|██████████| 3932/3932 [21:35<00:00,  3.03it/s]


Epoch 10/30 | lr: 0.000079 | Loss: 0.5383 | Train Acc: 0.7869 | Val Acc: 0.8188 | Val F1(w): 0.8195 | Val F1(m): 0.8307


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 11/30: 100%|██████████| 3932/3932 [21:34<00:00,  3.04it/s]


Epoch 11/30 | lr: 0.000075 | Loss: 0.5192 | Train Acc: 0.7927 | Val Acc: 0.8369 | Val F1(w): 0.8369 | Val F1(m): 0.8418


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 12/30: 100%|██████████| 3932/3932 [21:21<00:00,  3.07it/s]


Epoch 12/30 | lr: 0.000070 | Loss: 0.5018 | Train Acc: 0.7999 | Val Acc: 0.8426 | Val F1(w): 0.8428 | Val F1(m): 0.8499


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 13/30: 100%|██████████| 3932/3932 [21:19<00:00,  3.07it/s]


Epoch 13/30 | lr: 0.000065 | Loss: 0.4832 | Train Acc: 0.8050 | Val Acc: 0.8455 | Val F1(w): 0.8467 | Val F1(m): 0.8542


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 14/30: 100%|██████████| 3932/3932 [22:28<00:00,  2.92it/s]


Epoch 14/30 | lr: 0.000060 | Loss: 0.4671 | Train Acc: 0.8141 | Val Acc: 0.8505 | Val F1(w): 0.8508 | Val F1(m): 0.8577


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 15/30: 100%|██████████| 3932/3932 [22:07<00:00,  2.96it/s]


Epoch 15/30 | lr: 0.000055 | Loss: 0.4525 | Train Acc: 0.8181 | Val Acc: 0.8554 | Val F1(w): 0.8561 | Val F1(m): 0.8634


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 16/30: 100%|██████████| 3932/3932 [21:59<00:00,  2.98it/s]


Epoch 16/30 | lr: 0.000050 | Loss: 0.4375 | Train Acc: 0.8270 | Val Acc: 0.8569 | Val F1(w): 0.8560 | Val F1(m): 0.8632


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 17/30: 100%|██████████| 3932/3932 [21:53<00:00,  2.99it/s]


Epoch 17/30 | lr: 0.000045 | Loss: 0.4226 | Train Acc: 0.8323 | Val Acc: 0.8581 | Val F1(w): 0.8576 | Val F1(m): 0.8637


  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  with torch.cuda.amp.autocast(enabled=use_cuda):
Epoch 18/30:   2%|▏         | 90/3932 [00:27<19:37,  3.26it/s]


KeyboardInterrupt: 

In [14]:
torch.save(best_state, "convnext_tiny_best.pth")
print("Saved convnext_tiny_best.pth | best_f1 =", best_f1)

Saved convnext_tiny_best.pth | best_f1 = 0.8576002263508908


In [15]:
state = torch.load("convnext_tiny_best.pth", map_location=DEVICE)
model.load_state_dict(state)
model.to(DEVICE)
model.eval()

  state = torch.load("convnext_tiny_best.pth", map_location=DEVICE)


ConvNeXt(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
    )
    (1): Sequential(
      (0): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=96, out_features=384, bias=True)
          (4): GELU(approximate='none')
          (5): Linear(in_features=384, out_features=96, bias=True)
          (6): Permute()
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=

In [16]:
import torch.nn.functional as F

test_files = sorted([f for f in os.listdir(test_dir) if f.lower().endswith(".tif")])

BATCH = 32 
results = []

with torch.no_grad():
    for start in tqdm(range(0, len(test_files), BATCH), desc="Predicting (batched + hflip TTA)"):
        batch_files = test_files[start:start+BATCH]

        xs = []
        img_ids = []
        for fname in batch_files:
            img_ids.append(os.path.splitext(fname)[0])
            img_path = os.path.join(test_dir, fname)

            img = read_tif_as_hwc_float01(img_path)   
            xs.append(val_transform(img))             

        x = torch.stack(xs, dim=0).to(DEVICE, non_blocking=True)


        probs1 = F.softmax(model(x), dim=1)
        probs2 = F.softmax(model(torch.flip(x, dims=[3])), dim=1)
        probs = 0.5 * (probs1 + probs2)

        pred_idxs = probs.argmax(dim=1).detach().cpu().numpy().tolist()

        for img_id, pred_idx in zip(img_ids, pred_idxs):
            results.append((img_id, idx_to_id[pred_idx]))

submission = pd.DataFrame(results, columns=["image_id", "satellite_id"])
submission.to_csv("submissionFinal.csv", index=False)

print("submissionFinal.csv with", len(submission), "rows")
submission.head()

Predicting (batched + hflip TTA): 100%|██████████| 308/308 [07:34<00:00,  1.48s/it]

submissionFinal.csv with 9834 rows





Unnamed: 0,image_id,satellite_id
0,1,0
1,2,0
2,3,4
3,4,4
4,5,5


In [18]:

ALPHA = 0.80        
USE_TTA = False    
BATCH = 32          

def build_convnext_tiny(num_classes):
    m = models.convnext_tiny(weights=None)
    m.classifier[2] = nn.Linear(m.classifier[2].in_features, num_classes)
    return m

def build_resnet18(num_classes):
    m = models.resnet18(weights=None)
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

def load_weights_flexible(model, path, device):
    obj = torch.load(path, map_location=device)
    if isinstance(obj, dict) and "model_state" in obj:
        model.load_state_dict(obj["model_state"])
        return obj
    else:
        model.load_state_dict(obj)
        return None

convnext = build_convnext_tiny(NUM_CLASSES).to(DEVICE).eval()
resnet   = build_resnet18(NUM_CLASSES).to(DEVICE).eval()

_ = load_weights_flexible(convnext, "convnext_tiny_best.pth", DEVICE)
_ = load_weights_flexible(resnet,   "resnet18_best.pth", DEVICE)

@torch.no_grad()
def probs_model(model, x, use_tta=False):
    p = F.softmax(model(x), dim=1)
    if use_tta:
        p2 = F.softmax(model(torch.flip(x, dims=[3])), dim=1)
        p = 0.5 * (p + p2)
    return p

# Predict
test_files = sorted([f for f in os.listdir(test_dir) if f.lower().endswith(".tif")])
results = []

with torch.no_grad():
    for start in tqdm(range(0, len(test_files), BATCH), desc="Predicting ensemble"):
        batch_files = test_files[start:start+BATCH]

        xs, img_ids = [], []
        for fname in batch_files:
            img_ids.append(os.path.splitext(fname)[0])
            img = read_tif_as_hwc_float01(os.path.join(test_dir, fname))
            xs.append(val_transform(img))

        x = torch.stack(xs, dim=0).to(DEVICE, non_blocking=True)

        p_c = probs_model(convnext, x, use_tta=USE_TTA)
        p_r = probs_model(resnet,   x, use_tta=USE_TTA)

        p = ALPHA * p_c + (1 - ALPHA) * p_r
        pred_idxs = p.argmax(dim=1).detach().cpu().tolist()

        for img_id, pred_idx in zip(img_ids, pred_idxs):
            results.append((img_id, idx_to_id[pred_idx]))

submission = pd.DataFrame(results, columns=["image_id", "satellite_id"])
submission.to_csv("submissionFinalEnsemble.csv", index=False)

print("Wrote submissionFinalEnsemble.csv | rows:", len(submission), "| alpha:", ALPHA, "| TTA:", USE_TTA)
submission.head()


  obj = torch.load(path, map_location=device)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
Predicting ensemble: 100%|██████████| 308/308 [04:45<00:00,  1.08it/s]

Wrote submissionFinalEnsemble.csv | rows: 9834 | alpha: 0.8 | TTA: False





Unnamed: 0,image_id,satellite_id
0,1,0
1,2,0
2,3,4
3,4,4
4,5,5
