In [1]:
import gdown
import os
import zipfile

import rasterio
import numpy as np
from tqdm import tqdm
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from datetime import datetime
import pandas as pd
import geopandas as gpd
from rasterstats import zonal_stats
from shapely.geometry import box, Point

In [2]:
def download_file_from_drive(
    file_url, file_name, output_dir="../../drive_downloaded_files"
):
    """
    Downloads a file from Google Drive.

    Parameters:
        file_url (str): The Google Drive URL to download the file from.
        file_name (str): The name to save the downloaded file as.
        output_dir (str): The directory where the downloaded file will be saved. Defaults to "../../drive_downloaded_files".
    """
    # Create the full output path
    output_path = os.path.join(output_dir, file_name)

    # Create the folder if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Download the file if it doesn't exist
    if not os.path.exists(output_path):
        print(f"Downloading the file from {file_url}...")
        gdown.download(file_url, output_path, quiet=False)
    else:
        print(f"The file {output_path} already exists.")

    return output_path  # Return the path of the downloaded file

In [3]:
# Google Drive file URL and name
drive_url_population = (
    "https://drive.google.com/uc?export=download&id=1AXvlLrvxq7Orze-DCNhZiFoehAFTglL2"
)
file_name_population = "GHS_POP_E2020_GLOBE_R2023A_4326_30ss_V1_0.tif"

# Download the file
raster_path = download_file_from_drive(drive_url_population, file_name_population)

# Read the raster projection using rasterio
with rasterio.open(raster_path) as src:
    raster_crs = src.crs  # Get the raster CRS
    print(f"Raster CRS: {raster_crs}")

The file ../../drive_downloaded_files/GHS_POP_E2020_GLOBE_R2023A_4326_30ss_V1_0.tif already exists.
Raster CRS: GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]


# Country Data

In [4]:
countries_zip_url = "https://drive.google.com/uc?id=1UQzdO7suT0BnwKBeNybMG97vM9GIDogA"
countries_zip_file_path = "../../allCountries.zip"

# Download the ZIP file if it doesn't exist; otherwise, proceed to read the TXT file.
if not os.path.exists(countries_zip_file_path):
    gdown.download(countries_zip_url, countries_zip_file_path, quiet=False)

with zipfile.ZipFile(countries_zip_file_path) as z:
    countries_txt_filename = "allCountries.txt"

    with z.open(countries_txt_filename) as txt_file:
        countries_df = pd.read_csv(txt_file, sep="\t", header=None)


# https://download.geonames.org/export/dump/
countries_df.columns = [
    "geonameid",
    "name",
    "asciiname",
    "alternatenames",
    "latitude",
    "longitude",
    "feature class",
    "feature code",
    "iso alpha 2",
    "cc2",
    "admin1 code",
    "admin2 code",
    "admin3 code",
    "admin4 code",
    "population",
    "elevation",
    "dem",
    "timezone",
    "modification date",
]

print(f"\nshape: {countries_df.shape}")
countries_df.head()

  countries_df = pd.read_csv(txt_file, sep="\t", header=None)



shape: (12950185, 19)


Unnamed: 0,geonameid,name,asciiname,alternatenames,latitude,longitude,feature class,feature code,iso alpha 2,cc2,admin1 code,admin2 code,admin3 code,admin4 code,population,elevation,dem,timezone,modification date
0,2994701,Roc Meler,Roc Meler,"Roc Mele,Roc Meler,Roc Mélé",42.58765,1.7418,T,PK,AD,"AD,FR",02,,,,0,2811.0,2348,Europe/Andorra,2023-10-03
1,3017832,Pic de les Abelletes,Pic de les Abelletes,"Pic de la Font-Negre,Pic de la Font-Nègre,Pic ...",42.52535,1.73343,T,PK,AD,FR,A9,66.0,663.0,66146.0,0,,2411,Europe/Andorra,2014-11-05
2,3017833,Estany de les Abelletes,Estany de les Abelletes,"Estany de les Abelletes,Etang de Font-Negre,Ét...",42.52915,1.73362,H,LK,AD,FR,A9,,,,0,,2260,Europe/Andorra,2014-11-05
3,3023203,Port Vieux de la Coume d’Ose,Port Vieux de la Coume d'Ose,"Port Vieux de Coume d'Ose,Port Vieux de Coume ...",42.62568,1.61823,T,PASS,AD,,00,,,,0,,2687,Europe/Andorra,2014-11-05
4,3029315,Port de la Cabanette,Port de la Cabanette,"Port de la Cabanette,Porteille de la Cabanette",42.6,1.73333,T,PASS,AD,"AD,FR",B3,9.0,91.0,9139.0,0,,2379,Europe/Andorra,2014-11-05


## EUI

In [5]:
eui_url = "https://drive.google.com/uc?id=12qGq_DLefI1RihIF_RKQUyJtm480-xRC"
eui_df = pd.read_csv(eui_url)

print(f"shape: {eui_df.shape}")
eui_df.head()

shape: (482, 5)


Unnamed: 0,City,Geonames ID,Country,Residential EUI (kWh/m2/year),Non-residential EUI (kWh/m2/year)
0,Nha Trang,1572151,Vietnam,59.096065,112.778867
1,Aberdeen,2657832,United Kingdom,231.302877,259.832393
2,Abidjan,2293538,Cote d'Ivoire,73.830819,105.622137
3,Abu Dhabi,292968,United Arab Emirates,128.447899,226.725457
4,Abuja,2352778,Nigeria,63.955819,103.009079


In [6]:
df = pd.merge(
    countries_df, eui_df, left_on="geonameid", right_on="Geonames ID", how="inner"
)

In [7]:
df = df[["geonameid", "latitude", "longitude"]]

## New Population 

In [8]:
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326"
)

# projection into meter
gdf_mercator = gdf.to_crs("EPSG:3857")

gdf_equal_area = gpd.GeoDataFrame(
    gdf_mercator,  # keep all the col
    geometry=gdf_mercator.geometry.to_crs("EPSG:3035"),
)


# equal-area projection
def create_true_area_buffer(point, distance_km=2):
    """generate 2km² square under the equal area projection"""
    side_m = distance_km * 1000  # convert to meter
    return box(
        point.x - side_m / 2,
        point.y - side_m / 2,
        point.x + side_m / 2,
        point.y + side_m / 2,
    )


# ensure to apply under the equal area projection
gdf_equal_area["buffer"] = gdf_equal_area.geometry.apply(create_true_area_buffer)


# transform buffer geometry to WGS84 while preserving area calculations in the equal-area coordinate system
gdf_buffers = gpd.GeoDataFrame(
    gdf_equal_area[["buffer"]],
    geometry="buffer",
    crs="EPSG:3035",  # Ensure the original projection is correct
).to_crs(
    "EPSG:4326"
)  # Convert to WGS84 as the final projection


# Perform zonal statistics (automatically handles coordinate system alignment)
stats = zonal_stats(
    vectors=gdf_buffers["buffer"],
    raster=raster_path,
    stats=["sum"],
    nodata=0,  # Adjust according to the actual NoData value in the raster
)


# compute population density (strictly per 4 km²)
gdf["population_density"] = [s["sum"] / 4 if s["sum"] else 0 for s in stats]

# generate the final result (preserving the original coordinate system)
pop_df = gdf[["geonameid", "latitude", "longitude", "population_density"]]

In [12]:
pop_df

Unnamed: 0,geonameid,latitude,longitude,population_density
0,292968,24.45118,54.39696,6027.494801
1,1138958,34.52813,69.17233,40085.123056
2,3183875,41.32750,19.81889,26918.855498
3,616052,40.18111,44.51361,13532.447017
4,2240449,-8.83682,13.23432,37139.850778
...,...,...,...,...
477,1018725,-29.12107,26.21400,2799.432817
478,3369157,-33.92584,18.42322,2845.734900
479,909137,-15.40669,28.28713,7031.355416
480,890299,-17.82772,31.05337,12115.097301


In [11]:
# # get new pop csv
pop_df.to_csv("../data/02_interim/population_density.csv", index=False)