# Clandestine Runway Detection
This notebook presents a deep learning approach to detect clandestine or unauthorized runways in satellite imagery. Clandestine runways pose security risks, as they can be used for illegal activities such as unauthorized landings, smuggling, and other covert operations. The goal of this project is to identify and localize active clandestine runways in remote regions using high-resolution satellite data, making it easier for authorities and organizations to monitor and address these hidden threats.

## Setup

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [5]:
import os
import re
import shutil
import toml
import json
import random
import datetime
from functools import partial

import numpy as np
import pandas as pd
import geopandas as gpd
from skimage.transform import resize
from rasterio.enums import Resampling

from scipy.interpolate import make_interp_spline

from sklearn.model_selection import train_test_split
import tensorflow as tf
import torch
from transformers import TFSegformerModel, TFSegformerDecodeHead, SegformerConfig

import matplotlib.pyplot as plt
import seaborn as sns

from src.utils import *
from src.model import DualSegformerClassifierModel, ClearMemoryCallback

In [9]:
# check gpu device available
assert len(tf.config.experimental.list_physical_devices('GPU')) > 0, 'This notebook requires GPU to run!'

In [None]:
# set notebook memory limit
tf.config.LogicalDeviceConfiguration(memory_limit=7500)
torch.cuda.set_per_process_memory_fraction(0.9)

In [12]:
SEED = 1234
set_seed(SEED)

Seed set to: 1234


## Constants

In [13]:
config = toml.load('./config.toml')
config.keys()

dict_keys(['data', 'params', 'output', 'patches'])

In [14]:
train_dir = config['data']['image_data']['train_image_dir']
test_dir = config['data']['image_data']['test_image_dir']
logs_dir = config['output']['logs_dir']

In [15]:
BATCH_SIZE = config['params']['model']['batch_size']
HEIGHT = config['params']['data']['height']
WIDTH = config['params']['data']['width']

## Load Data

### Helper Functions

In [16]:
def load_images(path):
    '''Loads data pairs of tif image and corresponding mask
    Parameters
    ----------
    path : str
        Path to folder containing raster-mask pairs
    Returns
    -------
    list of ndarrays
    '''
    # Fetch files
    images = [os.path.join(path, i) for i in os.listdir(path)]
    image_feature = [i for i in images if 'sdata' in i][0]
    image_target = [i for i in images if 'target' in i][0]

    # Read target mask
    with rio.open(image_target) as src:
        target_bounds = src.bounds
        arr_target = src.read( 
            out_shape = (256, 256), # output size hardcoded to 256 8 256
            resampling = Resampling.bilinear
        )
        arr_target = np.moveaxis(arr_target, 0, -1)
        
        # some non examples got corrupted during saving
        # pending new download fix..
        # the condition below manually fixes the problem
        if path[-1] == '0' and arr_target.sum() > 0:
            arr_target *= 0
        src.close()

    # Read image mask
    with rio.open(image_feature) as src2:
        arr_feature = src2.read(
            out_shape = (src2.count, 512, 512), # size hardcoded to segformer 512_512
            resampling = Resampling.nearest
        )
        arr_feature = np.moveaxis(arr_feature, 0, -1)
        src2.close()

    # delete vars to save memeory
    del images, image_feature, image_target
    
    return (arr_feature, arr_target)

In [17]:
def plot_sample_images(path, num_samples, cmap = 'Spectral_r', random_state = 1):
    '''
    Helper function to visualize samples
    Parameters
    path : str
        Path to folder containing image mask pairs.
    num_samples : int
        Number of samples to plot. Maximum of six (6).
    cmap : str
        Matplotlib cmap for visualization.
    random_state : int
        Random seed
    Returns
    -------
    None
    '''
    # clip number of samples
    num_samples = min(num_samples, 6)

    # set random state
    np.random.seed(random_state)

    # select samples
    samples = np.random.choice(path, num_samples, replace = False)
        
    # set figure
    fig, ax = plt.subplots(2, num_samples, figsize = (24, 6))

    # plot images
    for i in range(num_samples):
        img, targ = load_images(samples[i])
        img = np.clip(img[:, :, [2, 1, 0]] / 1000, 0, 1)       
        ax[0][i].imshow(img, cmap = cmap)
        ax[1][i].imshow(targ, cmap = 'gray')
        ax[0][i].axis('off')
        ax[1][i].axis('off')

## Load and view data

In [18]:
# load data from train download log
train_download_log = os.path.join(logs_dir, [i for i in os.listdir(logs_dir) if 'train_params.csv' in i][0])
train_shp = [os.path.join(train_dir, i) for i in os.listdir(train_dir) if 'geojson' in i][0]
train_shp

FileNotFoundError: [Errno 2] No such file or directory: './logs'

In [None]:
# read competition train data
train_download_df = pd.read_csv(train_download_log)
train_gdf = gpd.read_file(train_shp)

# join to download log files
train_gdf = train_gdf.set_index('filename').join(train_download_df.set_index('name'))

# manually correct location with activo = 2 not caught in during download and too late to change at this point. 
train_gdf.loc[(train_gdf.Activo == 2) & (train_gdf.runway == 1), 'Activo'] = 1

# reformat active
# 0 = inactive, 1 = active, 2 = no runway
train_gdf['Activo'] = abs(2 - train_gdf.Activo)

In [None]:
# check if the error is fixed
m = train_gdf[train_gdf.Activo == 0]
m[m.id.duplicated(keep = False)]

The download script could not retrieve suitable images from 2015. Filter out these points from the dataset

In [None]:
# select valid tbrain data
valid_train_df = train_gdf[train_gdf.status == 1]

In [None]:
# select train file directories from the dataframe
train_files = valid_train_df.apply(lambda x: x.output_dir, axis = 1).to_list()
train_target = valid_train_df['runway'].astype('float32')

In [None]:
# visualize
plot_sample_images(train_files, 12, random_state = 6)