In [7]:
import sys
import os
from pathlib import Path

path = Path(os.getcwd())
parent_directory = Path.joinpath(path.parent,'src').as_posix()
sys.path.append(str(parent_directory))

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from sklearn.metrics import roc_auc_score
import copy
from tqdm import tqdm


from rasterio.features import rasterize
from shapely.geometry import mapping
from skimage.transform import resize
import re
from rasterio.mask import mask

import time
import numpy as np
import pandas as pd
from PIL import Image
import cv2
from datetime import datetime
import geopandas as gpd
import seaborn as sns
from sklearn.metrics import r2_score
from typing import List, Tuple, Dict, Any, Union
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from utils import *
from notebook_functions import *
np.random.seed(42)
torch.manual_seed(42)
import random
import rasterio

from models import get_model

import itertools
import timeit

#### RankerDataset, Resuable Tools

In [8]:
from pathlib import Path

def get_date(image_date):
    year = image_date[:4]
    month = image_date[4:6]
    day = image_date[6:]
    ml_date = datetime.strptime(f"{year}-{month}-{day}", "%Y-%m-%d").date()
    return ml_date

def get_day_of_week(date):
    date = datetime.strptime(date, "%Y-%m-%d")
    day = date.strftime("%A")
    day = day.lower()
    return day

def get_pairs_saturday(anchor_image, anchor_pair):
    pairs_dict = {}
    pairs_dict["anchor_image"] = []
    pairs_dict['anchor_image_pair'] = []
    pairs_dict['label'] = []
    postive_pairs = [(sat_image, sun_image) for sat_image in anchor_image for sun_image in anchor_pair]
    for pair in postive_pairs:
        pairs_dict["anchor_image"].append(pair[0])
        pairs_dict["anchor_image_pair"].append(pair[1])
        pairs_dict["label"].append(1)
    
    negative_pairs = [(sun_image, sat_image) for sun_image in anchor_pair for sat_image in anchor_image]
    for pair in negative_pairs:
        pairs_dict["anchor_image"].append(pair[0])
        pairs_dict["anchor_image_pair"].append(pair[1])
        pairs_dict["label"].append(0)
        
    pairs_df = pd.DataFrame(pairs_dict)
    return pairs_df

In [None]:
class PlanetRankerInfDataset(Dataset):
    def __init__(self, im_data, transform=None, preprocess_type='none', building_polygon_path=None):
        if isinstance(im_data, str):
            image_dataframe = pd.read_csv(im_data)
        elif isinstance(im_data, pd.DataFrame):
            image_dataframe = im_data.reset_index(drop=True)

        self.image_dataframe = image_dataframe
        self.transform = transform
        self.preprocess_type = preprocess_type
        self.image_cache = {}
        self.building_polygon_path = building_polygon_path

    def __len__(self):
        return len(self.image_dataframe)

    def stacking_rgb_images(self, image):
        if image.shape[0] == 8:
            red = image[5, :, :] 
            green = image[3, :, :]
            blue = image[1, :, :]
        elif image.shape[0] == 4:
            red = image[2, :, :]
            green = image[1, :, :]
            blue = image[0, :, :]

        return np.dstack((red, green, blue)).astype('float32')

    def remove_building_polygons(self, parking_lot_img_path, building_polygon_path = "../../input/pheonix_car_lots/building_polygons/"):
        parking_lot_name = Path(parking_lot_img_path).parent.stem
        building_polygon_path = f"{building_polygon_path}/{parking_lot_name}_buildings.geojson"

        with rasterio.open(parking_lot_img_path) as src:
            parking_lot_img = src.read()
            parking_lot_img = convert_to_rgb(parking_lot_img)
            parking_lot_img_original = parking_lot_img.copy()
            profile = src.profile

        # Load the building polygon
        buildings = gpd.read_file(building_polygon_path)

        # Ensure the CRS matches the image CRS
        buildings = buildings.to_crs(profile["crs"])
        #print(parking_lot_img.shape[:2])
        # Rasterize the building polygon (set building areas to 1, else 0)
        building_mask = rasterize(
            [(geom, 1) for geom in buildings.geometry],
            out_shape=parking_lot_img.shape[:2],
            transform=profile["transform"],
            fill=0,
            dtype=np.uint8
        )

        # Replace building pixels with zero in the parking lot image
        parking_lot_img[building_mask == 1] = 0

        return parking_lot_img

    def __getitem__(self, index):
        row = self.image_dataframe.iloc[index]
        anchor_image_image_path = row['anchor_image']
        anchor_image_pair_image_path = row['anchor_image_pair']

        with rasterio.open(anchor_image_image_path, 'r') as src:
            anchor_img_data = src.read()
            
        with rasterio.open(anchor_image_pair_image_path, 'r') as src:
            anchor_pair_img_data = src.read()

        if self.preprocess_type == 'derivatives':
            anchor_img_data = convert_to_image_derivatives(anchor_img_data)
            anchor_pair_img_data = convert_to_image_derivatives(anchor_pair_img_data)
        elif self.preprocess_type == 'rgb':
            if self.building_polygon_path:
                print("Removing building polygons ....................")
                anchor_img_data = self.remove_building_polygons(anchor_image_image_path, self.building_polygon_path)
                anchor_pair_img_data = self.remove_building_polygons(anchor_image_pair_image_path, self.building_polygon_path)
            else:
                anchor_img_data = convert_to_rgb(anchor_img_data)
                anchor_pair_img_data = convert_to_rgb(anchor_pair_img_data)
        
        else:
            raise ValueError(
                "Invalid preprocess type, choose from ['derivatives', 'rgb']")

        if self.transform:
            anchor_img_data_tensor = self.transform(anchor_img_data)
            anchor_pair_img_data_tensor = self.transform(anchor_pair_img_data)
        else:
            anchor_img_data_tensor = torch.as_tensor(anchor_img_data)
            anchor_pair_img_data_tensor = torch.as_tensor(anchor_pair_img_data)

        return anchor_img_data_tensor, anchor_pair_img_data_tensor

from typing import Dict
def get_inference_dataloader(test_data_path: str, test_augmentations: Dict, preprocess_type: str, batch_size: int, building_polygon_path: str = None):
    test_transform = get_transforms(test_augmentations)

    if isinstance(test_data_path, str):
        test_df = pd.read_csv(test_data_path)
    elif isinstance(test_data_path, pd.DataFrame):
        test_df = test_data_path.copy()
    else:
        raise ValueError("test_data_path should be either a string or a dataframe")

    test_dataset = PlanetRankerInfDataset(test_df, transform=test_transform, preprocess_type=preprocess_type, building_polygon_path=building_polygon_path)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    return test_loader


def load_model(model_config, model_weight):
    model = get_model(model_config)
    logger.info(f"Loading model from {model_weight}")
    model.load_state_dict(torch.load(model_weight))
    return model


In [2]:
def has_2020_images(df):
    if 2020 in df['year'].unique():
        return True
    else:
        return False

def get_2020_images(df):
    covid_images = df[(df['year'] >= 2020) & (df['year'] <= 2022)]['image_path'].values
    if isinstance(covid_images, np.ndarray):
        return covid_images
    else:
        return [covid_images]

def get_pair_df(iterr_ls):
    pairs_dict = {}
    pairs_dict["anchor_image"] = []
    pairs_dict['anchor_image_pair'] = []
    for pair in iterr_ls:
        pairs_dict["anchor_image"].append(pair[0])
        pairs_dict["anchor_image_pair"].append(pair[1])
    pairs_df = pd.DataFrame(pairs_dict)
    return pairs_df

def ranker_inference_hard(pairs_df, anchor_date, multiple_lots=False):

    if multiple_lots:
        all_data_pred_ls = []
        for parking_lot in pairs_df['parking_lot_name'].unique():
            sel_pairs_df = pairs_df[pairs_df['parking_lot_name'] == parking_lot]
            print(f"Processing {parking_lot}, with size {sel_pairs_df.shape}")
            date_pred_scores = {}
            date_pred_scores['date'] = []
            date_pred_scores['probs'] = []
            date_pred_scores['raw_rank'] = []
            date_pred_scores['rank_scaled'] = []
            unique_anchor_dates = sel_pairs_df[f'{anchor_date}'].unique()
            for date in unique_anchor_dates:
                date_df = sel_pairs_df[sel_pairs_df[f'{anchor_date}'] == date]
                count_1 = date_df[date_df['pred'] == 1].shape[0]
                date_pred_scores['date'].append(date)
                date_pred_scores['probs'].append(date_df['probs'].mean())
                date_pred_scores['raw_rank'].append(count_1)
                date_pred_scores['rank_scaled'].append(count_1 / date_df.shape[0])
            date_pred_scores_df = pd.DataFrame(date_pred_scores)
            date_pred_scores_df['parking_lot_name'] = parking_lot
            all_data_pred_ls.append(date_pred_scores_df)
        overall_date_pred_scores_df = pd.concat(all_data_pred_ls)
    else:
        unique_anchor_dates = pairs_df[f'{anchor_date}'].unique()
        date_pred_scores = {}
        date_pred_scores['date'] = []
        date_pred_scores['probs'] = []
        date_pred_scores['raw_rank'] = []
        date_pred_scores['rank_scaled'] = []        
        for date in unique_anchor_dates:
            date_df = pairs_df[pairs_df[f'{anchor_date}'] == date]
            count_1 = date_df[date_df['pred'] == 1].shape[0]
            date_pred_scores['date'].append(date)
            date_pred_scores['probs'].append(date_df['probs'].mean())
            date_pred_scores['raw_rank'].append(count_1)
            date_pred_scores['rank_scaled'].append(count_1 / date_df.shape[0])

        overall_date_pred_scores_df = pd.DataFrame(date_pred_scores)
        overall_date_pred_scores_df = overall_date_pred_scores_df.sort_values('date')
    
    overall_date_pred_scores_df['rank'] = overall_date_pred_scores_df['raw_rank'].rank(ascending=True, method="dense").astype(int)
    return overall_date_pred_scores_df


def remove_img_with_no_clear_udm(udm_img, masked_area):
    '''
    checks if the image has no clear pixels in it then returns True else False
    True, means te image should be removed else it should be kept by returning False
    '''
    unique_values, value_counts = np.unique(udm_img[0], return_counts=True)
    clear_channel_value_counts_dict = dict(zip(unique_values, value_counts))

    masked_unique_values, masked_value_counts = np.unique(masked_area, return_counts=True)
    masked_value_counts_dict = dict(zip(masked_unique_values, masked_value_counts))

    if 1 in clear_channel_value_counts_dict.keys() and 1 in masked_value_counts_dict.keys():
        return False
    else:
        return True

def get_date(image_date):
    year = image_date[:4]
    month = image_date[4:6]
    day = image_date[6:]
    ml_date = datetime.strptime(f"{year}-{month}-{day}", "%Y-%m-%d").date()
    return ml_date

def clean_data_by_size(img_path: List[str]):
    '''Cleaning the data by removing the images which are not of the most common size'''
    logger.info(f"Initial size of dataset: {len(img_path)}")
    size = check_size_most(img_path)
    logger.info(f"Most common size of the dataset: {size}")
    for i in img_path:
        img = rioxarray.open_rasterio(i)
        if img.shape != size:
            img_path.remove(i)
    logger.info(f"Final size of dataset(after removing faulty size images): {len(img_path)}")
    return img_path


def clean_data_by_black_pixels(img_path_list):
    black_pixels = []
    for i in img_path_list:
        img = rioxarray.open_rasterio(i)
        black_pixels.append(np.count_nonzero(img.data[0:3].transpose(1, 2, 0) == 0))
    img_path_median = []
    for i in range(len(img_path_list)):
        if black_pixels[i] <= np.median(black_pixels):
            img_path_median.append(img_path_list[i])
    logger.info(f"Final size of dataset(after removing faulty black pixels in images): {len(img_path_median)}")
    return img_path_median


def clean_data_by_white_pixels(img_path_list, threshold=200):
    white_pixels = []
    for i in img_path_list:
        img = rioxarray.open_rasterio(i)
        img = convert_to_rgb(img.data)
        white_pixels.append(np.count_nonzero(img >= threshold))

    img_path_mean_white = []
    for i in range(len(img_path_list)):
        if white_pixels[i] <= np.mean(white_pixels):
            img_path_mean_white.append(img_path_list[i])
    logger.info(f"Final size of dataset(after removing faulty white pixels in images) {len(img_path_mean_white)}")
    return img_path_mean_white


def get_reference_image_single_parkinglot(images_path: str, shapefile_path: str):
    '''
    This function calculates the reference image for a single parking lot by taking the mean of all the gray images.
    '''
    all_images = []
    parking_lot_gdf = gpd.read_file(shapefile_path)
    for image_path in images_path:
        with rasterio.open(image_path) as src:
            image_8b = src.read()
            parking_lot_gdf = parking_lot_gdf.to_crs(src.crs)
            masked_data, _ = mask(src, parking_lot_gdf.geometry, crop=True)

        rgb_image = convert_to_rgb(image_8b)
        gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2GRAY)
        masked_data = masked_data[0]
        masked_data = np.where(masked_data == 0, 0, 1)
        gray_image = gray_image * masked_data

        gray_image_resized = resize(gray_image, (32, 32))
        all_images.append(gray_image_resized)

    all_images_np = np.array(all_images)
    reference_image = all_images_np.mean(axis=0)
    return reference_image


def calculate_histogram(image, bins=256):
    '''
    Calculate the histogram of the image
    '''
    hist = cv2.calcHist([image], [0], None, [bins], [0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def compare_histograms(hist1: np.ndarray, hist2: np.ndarray):
    '''
    Compare the histograms of two images
    '''
    return np.sum(np.abs(hist1 - hist2))

def cleaning_by_histogram_single_parkinglot(filtered_df_by_udm , shapefile_path: str, cluster_name: str, parking_lot_name: str):
    threshold = 0.45
    filter_images_by_size = clean_data_by_size(filtered_df_by_udm['image_path'].tolist())
    reference_image = get_reference_image_single_parkinglot(filter_images_by_size, shapefile_path)
    images_path = filter_images_by_size
    if reference_image.ndim == 2:
        reference_image = reference_image[np.newaxis, ...]

    reference_image = reference_image.astype(np.uint8)
    ref_hist = calculate_histogram(reference_image)
    filtered_images = []
    parking_lot_gdf = gpd.read_file(shapefile_path)

    for image_path in images_path:
        with rasterio.open(image_path) as src:
            image_8b = src.read()
            parking_lot_gdf = parking_lot_gdf.to_crs(src.crs)
            masked_data, _ = mask(src, parking_lot_gdf.geometry, crop=True)

        image = convert_to_rgb(image_8b)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        masked_data = masked_data[0]
        masked_data = np.where(masked_data == 0, 0, 1)
        image = image * masked_data

        image = image.astype(np.uint8)
        matched_image = image[np.newaxis, ...]
        hist = calculate_histogram(matched_image)
        similarity = compare_histograms(ref_hist, hist)
        #print(f"Similarity score : {similarity}")
        if similarity <= threshold:
            filtered_images.append(
                {'cluster': cluster_name, 'parking_lot_name': parking_lot_name, 'image_path': image_path})

    filtered_images_df = pd.DataFrame(filtered_images)
    return filtered_images_df

NameError: name 'List' is not defined

#### Sudan Use Case

In [None]:
import glob
def get_date(image_date):
    year = image_date[:4]
    month = image_date[4:6]
    day = image_date[6:]
    ml_date = datetime.strptime(f"{year}-{month}-{day}", "%Y-%m-%d").date()
    return ml_date

all_sudan =  glob.glob("../../input/sudan/polygon_images/sudan_clipped/**/*.tif", recursive=True)
all_sudan = [image for image in all_sudan if 'udm2' not in image]
all_sudan_dict = {}
all_sudan_dict['image_path'] = []
all_sudan_dict['date'] = []

for image in all_sudan:
    date = Path(image).stem.split("_")[0]
    all_sudan_dict['image_path'].extend([image])
    all_sudan_dict['date'].extend([get_date(date)])

all_sudan_df = pd.DataFrame(all_sudan_dict)
all_sudan_df['cluster'] = 'sudan'
all_sudan_df.to_csv("../../input/sudan/polygon_images/sudan_clipped/all_images.csv", index=False)

In [None]:
print(all_sudan_df.shape)

#### View all images from Sudan Jackdon Parking lot beofore ranking

In [None]:
all_sudan_df= pd.read_csv("../../input/sudan/polygon_images/sudan_clipped/all_images.csv")
rows, cols = 10, 16  # 10x16 grid
fig, axes = plt.subplots(rows, cols, figsize=(20, 12))  # Adjust figure size

# Iterate over images and plot them
for idx, (ax, (_, row)) in enumerate(zip(axes.flatten(), all_sudan_df.iterrows())):
    image_path = row['image_path']
    
    # Read and process image
    with rasterio.open(image_path, 'r') as src:
        img_data = src.read()
        img_data = convert_to_rgb(img_data)  # Convert from (bands, H, W) to (H, W, bands)

    # Display image
    ax.imshow(img_data)
    
    # Extract date from filename
    date = row['date']  # Assuming filename contains date
    
    ax.set_title(date, fontsize=8)
    ax.axis('off')

# Adjust layout and show the plot
plt.tight_layout()
plt.show()

#### Assign pre and post war labels based on days observed by 
Guo, Zhe, et al. "Monitoring indicators of economic activities in Sudan amidst ongoing conflict using satellite data." Defence and Peace Economics 35.8 (2024): 992-1008.

In [None]:
all_sudan_df = pd.read_csv("../../input/sudan/polygon_images/sudan_clipped/all_images.csv")
all_sudan_df['date'] = pd.to_datetime(all_sudan_df['date']).dt.date
pre_war_all_sudan_df = all_sudan_df[(all_sudan_df['date'] >= datetime.strptime('2023-04-01', "%Y-%m-%d").date()) & (all_sudan_df['date'] <= datetime.strptime('2023-04-07', "%Y-%m-%d").date())]
post_war_all_sudan_df = all_sudan_df[(all_sudan_df['date'] >= datetime.strptime('2023-04-14', "%Y-%m-%d").date()) & (all_sudan_df['date'] <= datetime.strptime('2023-04-21', "%Y-%m-%d").date())]
#current_all_sudan_df = all_sudan_df[(all_sudan_df['date'] >= datetime.strptime('2025-01-01', "%Y-%m-%d").date())]
pre_war_all_sudan_df['period'] = 'pre_war'
post_war_all_sudan_df['period'] = 'post_war'
#current_all_sudan_df['period'] = 'current'
all_sudan_df = pd.concat([pre_war_all_sudan_df, post_war_all_sudan_df])#, current_all_sudan_df])
all_sudan_df = all_sudan_df.reset_index(drop=True)
all_sudan_df['date'] = pd.to_datetime(all_sudan_df['date']).dt.date.astype(str)
all_sudan_df['day_of_week'] = all_sudan_df['date'].apply(get_day_of_week)
print(all_sudan_df.shape)

all_sudan_df["date"] = pd.to_datetime(all_sudan_df["date"])
all_sudan_df["day_of_week"] = all_sudan_df["date"].dt.day_name()
day_ranked_df_counts = all_sudan_df.groupby(["period", "day_of_week"])["date"].count().unstack(fill_value=0)
print(day_ranked_df_counts)
all_sudan_df

In [None]:
#Select only days that are presnt in both pre and post war data
all_sudan_df = all_sudan_df[all_sudan_df['day_of_week'].isin(['Saturday', 'Sunday', 'Monday','Tuesday', 'Wednesday'])]

In [None]:
config = load_config("../../experiments/20250325-013240_patch_pairwiserankerwithparameter/inference_config.yaml")
model = load_model(config['model_config'], f"../../{config['model_config']['model_weight']}")

all_sudan_df = all_sudan_df.groupby(['date','cluster']).agg({'image_path':'first'}).reset_index()
print(all_sudan_df.shape)
all_sudan_df['date'] = pd.to_datetime(all_sudan_df['date']).dt.date.astype(str)
all_sudan_df['day_of_week'] = all_sudan_df['date'].apply(lambda x: get_day_of_week(x))
# day_of_week = 'wednesday'
# all_sudan_df_saturday = all_sudan_df[all_sudan_df['day_of_week'] == day_of_week]

pairs = list(itertools.product(all_sudan_df['image_path'], repeat=2))

covid_anchor_images = [pair for pair in pairs if pair[0] != pair[1]]

pairs_sudan_df = pd.DataFrame(covid_anchor_images, columns=['anchor_image', 'anchor_image_pair'])

pairs_sudan_df['anchor_date'] = pairs_sudan_df.anchor_image.apply(lambda x: get_date(Path(x).stem.split("_")[0]))
pairs_sudan_df['anchor_day'] = pairs_sudan_df['anchor_date'].apply(lambda x: x.strftime("%A")).astype(str)
pairs_sudan_df['anchor_date'] = pairs_sudan_df['anchor_date'].astype(str)
pairs_sudan_df['anchor_pair_date'] = pairs_sudan_df.anchor_image_pair.apply(lambda x: get_date(Path(x).stem.split("_")[0]))
pairs_sudan_df['anchor_pair_date'] = pairs_sudan_df['anchor_pair_date'].astype(str)

test_loader = get_inference_dataloader(pairs_sudan_df, config['data_config']['test_data']['augmentations'], 'rgb', 1)
pair_wise_test_dict = {}
pair_wise_test_dict['probs'] = []

for anchor_image, anchor_image_pair in test_loader:
    model.eval()
    with torch.no_grad():
        output = model(anchor_image, anchor_image_pair)
        pair_wise_test_dict['probs'].extend(output.cpu().numpy()[0])

pairs_sudan_df['probs'] = pair_wise_test_dict['probs']
pairs_sudan_df['pred'] = np.where(pairs_sudan_df['probs'] <= 0.5, 0, 1)

In [None]:
ranked_sudan_df = ranker_inference_hard(pairs_sudan_df,'anchor_date')
ranked_sudan_df.reset_index(drop=True, inplace=True)
ranked_sudan_df

In [None]:
ranked_sudan_df_agg = ranked_sudan_df.copy()
ranked_sudan_df_agg = ranked_sudan_df_agg.groupby(['date']).agg({'raw_rank': 'mean'}).reset_index()
ranked_sudan_df_agg['rank'] = ranked_sudan_df_agg['raw_rank'].rank(ascending=True).astype(int)
ranked_sudan_df_agg.sort_values('date', inplace=True)
ranked_sudan_df_agg['day_of_week'] = ranked_sudan_df_agg['date'].apply(lambda x: get_day_of_week(x))
ranked_sudan_df_agg['date'] = pd.to_datetime(ranked_sudan_df_agg['date']).dt.date
ranked_sudan_df_agg['period'] = ranked_sudan_df_agg['date'].apply(lambda x: 'pre_war' if x <= datetime.strptime('2023-04-14', "%Y-%m-%d").date() else 'post_war')
ranked_sudan_df_agg['ID'] = ranked_sudan_df_agg['period'] + '_' + ranked_sudan_df_agg['day_of_week'] + "_" + ranked_sudan_df_agg['date'].astype(str)
plt.figure(figsize=(15, 10))
plt.bar(ranked_sudan_df_agg['ID'], ranked_sudan_df_agg['rank']-1)
plt.xticks(rotation=75)
plt.grid(True)
plt.title(f'Image ranking based of all parking lots on dates: Sudan Bus Terminal')
plt.tight_layout()
plt.ylabel('Number of images beaten')
plt.xlabel('Date')
plt.show()

In [None]:
ranked_sudan_df_agg = ranked_sudan_df.copy()
ranked_sudan_df_agg = ranked_sudan_df_agg.groupby(['date']).agg({'raw_rank': 'mean'}).reset_index()
ranked_sudan_df_agg['rank'] = ranked_sudan_df_agg['raw_rank'].rank(ascending=True).astype(int)
ranked_sudan_df_agg.sort_values('date', inplace=True)
ranked_sudan_df_agg['day_of_week'] = ranked_sudan_df_agg['date'].apply(lambda x: get_day_of_week(x))
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Saturday', 'Sunday']
ranked_sudan_df_agg['day_of_week'] = pd.Categorical(
    ranked_sudan_df_agg['day_of_week'].str.capitalize(),
    categories=day_order,
    ordered=True
)

ranked_sudan_df_agg['date'] = pd.to_datetime(ranked_sudan_df_agg['date']).dt.date
ranked_sudan_df_agg['period'] = ranked_sudan_df_agg['date'].apply(lambda x: 'pre_war' if x <= datetime.strptime('2023-04-14', "%Y-%m-%d").date() else 'post_war')
#ranked_sudan_df_agg['ID'] = ranked_sudan_df_agg['period'] + '_' + ranked_sudan_df_agg['day_of_week'] + "_" + ranked_sudan_df_agg['date'].astype(str)
#ranked_sudan_df_agg['day_of_week'] = ranked_sudan_df_agg['day_of_week'].str.capitalize()
plt.figure(figsize=(15, 10))
sns.barplot(data=ranked_sudan_df_agg, x='day_of_week', y='rank', hue='period', order=day_order)
plt.legend(title='Period', loc='upper left', fontsize=20)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xticks(rotation=75)
plt.tight_layout()
plt.ylabel('Image ranking', fontsize=20)
plt.xlabel('Day of week', fontsize=20)
plt.show()

In [None]:
rows, cols = 2, 5  # 10x16 grid
fig, axes = plt.subplots(rows, cols, figsize=(20, 12))  # Adjust figure size

# Iterate over images and plot them
for idx, (ax, (_, row)) in enumerate(zip(axes.flatten(), all_sudan_df.iterrows())):
    image_path = row['image_path']
    
    # Read and process image
    with rasterio.open(image_path, 'r') as src:
        img_data = src.read()
        img_data = convert_to_rgb(img_data)  # Convert from (bands, H, W) to (H, W, bands)

    # Display image
    ax.imshow(img_data)
    
    # Extract date from filename
    date = row['date']  # Assuming filename contains date
    
    ax.set_title(date, fontsize=8)
    ax.axis('off')

# Adjust layout and show the plot
plt.tight_layout()
plt.show()