<font size="7">FEATURE EXTARCTION FROM SATALLITE IMAGES</font>

In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torchvision.transforms as T
import torchvision.models as models
from torchvision import transforms
from tqdm import tqdm

IMG_DIR = Path("images_mapbox")
ARTIFACTS = Path("artifacts")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

Device: cpu


<font size="7">TRANING DATA</font>

In [None]:
CSV_PATH = "train(1)(train(1)).csv"
IMAGE_DIR = Path("images_mapbox")

ARTIFACTS_DIR = Path("artifacts")
ARTIFACTS_DIR.mkdir(exist_ok=True)

IMG_FEATURES_FILE = ARTIFACTS_DIR / "image_features_resnet.csv"


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Running on {DEVICE}")

def extract_image_features():
    """
    Uses a Pre-trained ResNet18 to read images and turn them into numbers.
    Returns: A DataFrame of image features.
    """
    if os.path.exists(IMG_FEATURES_FILE):
        print("Features already extracted. Loading from file...")
        return pd.read_csv(IMG_FEATURES_FILE)

    print("Initializing ResNet18 for feature extraction...")
    
    
    model = models.resnet18(pretrained=True)
    feature_extractor = nn.Sequential(*list(model.children())[:-1]) 
    feature_extractor.to(DEVICE)
    feature_extractor.eval()

    
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


    df_ids = pd.read_csv(CSV_PATH, usecols=['id'])
    if 'id' not in df_ids.columns: df_ids.rename(columns={df_ids.columns[0]: 'id'}, inplace=True)
    
    features = []
    valid_ids = []

    print(f"   -> Processing {len(df_ids)} images...")
    
    for _, row in tqdm(df_ids.iterrows(), total=len(df_ids)):
        img_id = row['id']
        img_path = IMAGE_DIR / f"{img_id}.png"
        
        if img_path.exists():
            try:
                img = Image.open(img_path).convert('RGB')
                img_t = preprocess(img).unsqueeze(0).to(DEVICE)
                
                with torch.no_grad():
                    embedding = feature_extractor(img_t)
                    features.append(embedding.flatten().cpu().numpy())
                    valid_ids.append(img_id)
            except:
                continue
        else:
            pass

    if not features:
        print(" CRITICAL ERROR: No valid images found or processed!")
        return pd.DataFrame()

    feat_cols = [f"img_{i}" for i in range(512)]
    df_feat = pd.DataFrame(features, columns=feat_cols)
    df_feat['id'] = valid_ids
    
    df_feat.to_csv(IMG_FEATURES_FILE, index=False)
    print(f"Saved features to {IMG_FEATURES_FILE}")
    return df_feat


df_img_features = extract_image_features()

Running on cpu
Initializing ResNet18 for feature extraction...




   -> Processing 16209 images...


100%|██████████| 16209/16209 [22:08<00:00, 12.20it/s]


Saved features to artifacts\image_features_resnet.csv


<font size="7">TEST DATA</font>

In [None]:
CSV_PATH = "test2(test(1)).csv"
IMAGE_DIR = Path("images_mapbox")

ARTIFACTS_DIR = Path("artifacts")
ARTIFACTS_DIR.mkdir(exist_ok=True)

IMG_FEATURES_FILE = ARTIFACTS_DIR / "test_image_features_resnet.csv"


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Running on {DEVICE}")

def extract_image_features():
    """
    Uses a Pre-trained ResNet18 to read images and turn them into numbers.
    Returns: A DataFrame of image features.
    """
    if os.path.exists(IMG_FEATURES_FILE):
        print("Features already extracted. Loading from file...")
        return pd.read_csv(IMG_FEATURES_FILE)

    print("Initializing ResNet18 for feature extraction...")
    
    model = models.resnet18(pretrained=True)
    feature_extractor = nn.Sequential(*list(model.children())[:-1]) 
    feature_extractor.to(DEVICE)
    feature_extractor.eval()

    
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

  
    df_ids = pd.read_csv(CSV_PATH, usecols=['id'])
    if 'id' not in df_ids.columns: df_ids.rename(columns={df_ids.columns[0]: 'id'}, inplace=True)
    
    features = []
    valid_ids = []

    print(f"   -> Processing {len(df_ids)} images...")
    
    for _, row in tqdm(df_ids.iterrows(), total=len(df_ids)):
        img_id = row['id']
        img_path = IMAGE_DIR / f"{img_id}.png"
        
        if img_path.exists():
            try:
                img = Image.open(img_path).convert('RGB')
                img_t = preprocess(img).unsqueeze(0).to(DEVICE)
                
                with torch.no_grad():
                    embedding = feature_extractor(img_t)
                    features.append(embedding.flatten().cpu().numpy())
                    valid_ids.append(img_id)
            except:
                continue
        else:
            pass

    if not features:
        print(" CRITICAL ERROR: No valid images found or processed!")
        return pd.DataFrame()

    feat_cols = [f"img_{i}" for i in range(512)]
    df_feat = pd.DataFrame(features, columns=feat_cols)
    df_feat['id'] = valid_ids
    
    df_feat.to_csv(IMG_FEATURES_FILE, index=False)
    print(f"Saved features to {IMG_FEATURES_FILE}")
    return df_feat

df_img_features1 = extract_image_features()



Running on cpu
Initializing ResNet18 for feature extraction...
   -> Processing 5404 images...


100%|██████████| 5404/5404 [09:42<00:00,  9.28it/s]


Saved features to artifacts\test_image_features_resnet.csv
