In [None]:
from google.colab import drive
drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/sichuan_data'   # same base folder as before
PATCH_DIR = f'{BASE_DIR}/patches'
FEATURE_DIR = f'{BASE_DIR}/features'

REPLICATION_DIR = '/content/drive/MyDrive/poverty_replication'  # path to your cloned repo
MODEL_CKPT = f'{REPLICATION_DIR}/replication/gold_standard/best_model.pth'  # adjust to your real checkpoint

print(PATCH_DIR)
print(FEATURE_DIR)
print(MODEL_CKPT)


In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn


In [None]:
import sys
sys.path.append(REPLICATION_DIR)


class NightLightCNN(nn.Module):

    def __init__(self, num_classes=3):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        feats = self.features(x)
        logits = self.classifier(feats)
        return logits

    def extract_features(self, x):
        feats = self.features(x)              # (B, C, 1, 1)
        feats = feats.view(feats.size(0), -1) # (B, C)
        return feats

# load model checkpoint
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NightLightCNN(num_classes=3).to(device)

state_dict = torch.load(MODEL_CKPT, map_location=device)
model.load_state_dict(state_dict)
model.eval()

print('Model loaded on', device)


In [None]:
from torchvision import transforms


transform = transforms.Compose([
    transforms.ToTensor(),          
    transforms.Resize((64, 64)),        

])

def preprocess_patch(arr):
    """
    arr: numpy array H x W x 3, raw Sentinel-2 reflectance.
    returns: torch tensor 1 x 3 x H x W
    """
    img = arr.astype('float32')

    img = img / (img.max() + 1e-6)  
    tensor = transform(img)
    return tensor.unsqueeze(0)       


In [None]:
meta_path = os.path.join(PATCH_DIR, 'patch_metadata.csv')
meta_df = pd.read_csv(meta_path)
meta_df.head()

files = meta_df['file'].tolist()
len(files)


In [None]:
os.makedirs(FEATURE_DIR, exist_ok=True)

all_features = []
kept_meta = []

with torch.no_grad():
    for i, row in meta_df.iterrows():
        fname = row['file']
        fpath = os.path.join(PATCH_DIR, fname)

        if not os.path.exists(fpath):
            print(f'{fname} missing, skip.')
            continue

        patch = np.load(fpath)  
        x = preprocess_patch(patch).to(device) 

        feats = model.extract_features(x)
        feats = feats.cpu().numpy().reshape(-1)

        all_features.append(feats)
        kept_meta.append(row)

features = np.vstack(all_features)
features.shape


In [None]:
features_path = os.path.join(FEATURE_DIR, 'sichuan_features.npy')
np.save(features_path, features)


kept_meta_df = pd.DataFrame(kept_meta)
meta_out_path = os.path.join(FEATURE_DIR, 'sichuan_features_metadata.csv')
kept_meta_df.to_csv(meta_out_path, index=False)

features_path, meta_out_path
