In [1]:
import requests
import geopandas as gpd
import pandas as pd
import os
# import requests_cache
# from requests.adapters import HTTPAdapter
# from requests.packages.urllib3.util.retry import Retry
# import openmeteo_requests
# OPENMETEO_AVAILABLE = True
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderUnavailable
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

import matplotlib as plt
import geoviews.feature as gf
import xarray as xr
from cartopy import crs
from geoviews import opts
import geoviews as gv
gv.extension('bokeh', 'matplotlib')
import geopandas as gpd
import seaborn as sns

# for basemaps
import contextily as ctx

# For spatial statistics
import esda
from esda.moran import Moran, Moran_Local

import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster,plot_moran_simulation

import libpysal as lps

# Graphics
import matplotlib.pyplot as plt
import plotly.express as px


In [2]:
def temp_humi_rain_data(latitude, longitude):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2000-01-01",
        "end_date": "2024-01-01",
        "daily": ["temperature_2m_mean", "apparent_temperature_mean", "precipitation_sum", "rain_sum"]
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    # Process daily data. The order of variables needs to be the same as requested.
    daily = response.Daily()
    daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
    daily_apparent_temperature_mean = daily.Variables(1).ValuesAsNumpy()
    daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(3).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
        start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
        end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = daily.Interval()),
        inclusive = "left"
    )}
    daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
    daily_data["apparent_temperature_mean"] = daily_apparent_temperature_mean
    daily_data["precipitation_sum"] = daily_precipitation_sum
    daily_data["rain_sum"] = daily_rain_sum
    daily_data["latitude"] = latitude
    daily_data["longitude"] = longitude
    daily_dataframe = pd.DataFrame(data = daily_data)

    return daily_dataframe


In [3]:
def flood_data(latitude, longitude):
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://flood-api.open-meteo.com/v1/flood"
    params = {
        "latitude": 59.91,
        "longitude": 10.75,
        "daily": ["river_discharge", "river_discharge_mean", "river_discharge_median", "river_discharge_max", "river_discharge_min"],
        "start_date": "2000-01-01",
        "end_date": "2024-01-01"
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    # Process daily data. The order of variables needs to be the same as requested.
    daily = response.Daily()
    daily_river_discharge = daily.Variables(0).ValuesAsNumpy()
    daily_river_discharge_mean = daily.Variables(1).ValuesAsNumpy()
    daily_river_discharge_median = daily.Variables(2).ValuesAsNumpy()
    daily_river_discharge_max = daily.Variables(3).ValuesAsNumpy()
    daily_river_discharge_min = daily.Variables(4).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
        start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
        end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = daily.Interval()),
        inclusive = "left"
    )}
    daily_data["river_discharge"] = daily_river_discharge
    daily_data["river_discharge_mean"] = daily_river_discharge_mean
    daily_data["river_discharge_median"] = daily_river_discharge_median
    daily_data["river_discharge_max"] = daily_river_discharge_max
    daily_data["river_discharge_min"] = daily_river_discharge_min

    daily_dataframe = pd.DataFrame(data = daily_data)
    return daily_dataframe

In [7]:
# # https://geocoding-api.open-meteo.com/v1/search?name=Niamey&count=50&language=en&format=json

# url = "https://climate-api.open-meteo.com/v1/climate"
# params = {
#     "latitude": 13.5137,
#     "longitude": 2.1098,
#     "start_date": "2000-01-01",
#     "end_date": "2024-01-01",
#     "daily": ["shortwave_radiation_sum", "pressure_msl_mean", "soil_moisture_0_to_10cm_mean", "et0_fao_evapotranspiration_sum"]
# }
# responses = openmeteo.weather_api(url, params=params)

In [None]:
url = "https://geocoding-api.open-meteo.com/v1/search?name=Niamey&count=10&language=en&format=json"

params = {
    "latitude": 13.5137,
    "longitude": 2.1098,
}


In [4]:
def climate_data(latitude, longitude):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://climate-api.open-meteo.com/v1/climate"
    params = {
        "latitude": 13.5137,
        "longitude": 2.1098,
        "start_date": "2000-01-01",
        "end_date": "2024-01-01",
        "daily": ["shortwave_radiation_sum", "pressure_msl_mean", "soil_moisture_0_to_10cm_mean", "et0_fao_evapotranspiration_sum"]
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    # Process daily data. The order of variables needs to be the same as requested.
    daily = response.Daily()
    daily_shortwave_radiation_sum = daily.Variables(0).ValuesAsNumpy()
    daily_pressure_msl_mean = daily.Variables(1).ValuesAsNumpy()
    daily_soil_moisture_0_to_10cm_mean = daily.Variables(2).ValuesAsNumpy()
    daily_et0_fao_evapotranspiration_sum = daily.Variables(3).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
        start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
        end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = daily.Interval()),
        inclusive = "left"
    )}
    daily_data["shortwave_radiation_sum"] = daily_shortwave_radiation_sum
    daily_data["pressure_msl_mean"] = daily_pressure_msl_mean
    daily_data["soil_moisture_0_to_10cm_mean"] = daily_soil_moisture_0_to_10cm_mean
    daily_data["et0_fao_evapotranspiration_sum"] = daily_et0_fao_evapotranspiration_sum

    daily_dataframe = pd.DataFrame(data = daily_data)
    return daily_dataframe

In [5]:

def get_coordinates(location):
    geolocator = Nominatim(user_agent="my_agent")
    try:
        # Attempt to geocode the location
        location_data = geolocator.geocode(location, timeout=10)
        
        if location_data:
            latitude = location_data.latitude
            longitude = location_data.longitude
            return latitude, longitude
        else:
            return None, None
    except (GeocoderTimedOut, GeocoderUnavailable):
        print("Error: Geocoding service timed out or unavailable. Please try again later.")
        return None, None

# Example usage
location = "Niamey, Niger"
lat, lon = get_coordinates(location)

if lat and lon:
    print(f"Coordinates for {location}:")
    print(f"Latitude: {lat}")
    print(f"Longitude: {lon}")
else:
    print(f"Could not find coordinates for {location}")



Coordinates for Niamey, Niger:
Latitude: 13.524834
Longitude: 2.109823


In [6]:
test = climate_data(lat, lon)
test

Coordinates 13.5°N 2.100006103515625°E
Elevation 202.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


Unnamed: 0,date,shortwave_radiation_sum,pressure_msl_mean,soil_moisture_0_to_10cm_mean,et0_fao_evapotranspiration_sum
0,2000-01-01 00:00:00+00:00,21.150856,1014.973083,0.030735,6.558338
1,2000-01-02 00:00:00+00:00,21.252209,1014.673035,0.030681,6.688530
2,2000-01-03 00:00:00+00:00,21.362318,1013.884033,0.030518,6.704773
3,2000-01-04 00:00:00+00:00,21.570112,1013.295044,0.030354,6.678779
4,2000-01-05 00:00:00+00:00,21.875753,1013.705994,0.030191,6.582730
...,...,...,...,...,...
8762,2023-12-28 00:00:00+00:00,20.194490,1011.717957,0.030513,6.609223
8763,2023-12-29 00:00:00+00:00,20.482523,1011.606750,0.030569,6.756763
8764,2023-12-30 00:00:00+00:00,20.867933,1011.095581,0.030624,6.933623
8765,2023-12-31 00:00:00+00:00,20.765699,1011.084351,0.030680,6.865988


In [14]:
def get_life_expectancy_data():
    import pandas as pd
    import requests

    # WHO GHO OData API endpoint for Life Expectancy at Birth (Indicator Code: WHOSIS_000001)
    url = "https://ghoapi.azureedge.net/api/WHOSIS_000001"

    # Retrieve data
    response = requests.get(url)
    data = response.json()

    # Convert data to DataFrame
    df = pd.json_normalize(data['value'])

    # Display the first few rows
    return df
# life_expectancy_df = get_life_expectancy_data()
# life_expectancy_df

In [16]:
def get_world_bank_gdp_data(countries):
    import pandas as pd
    from pandas_datareader import wb

    # Specify the indicator code for GDP
    indicator = 'NY.GDP.MKTP.CD'

    # Retrieve data
    data = wb.download(indicator=indicator, country=countries, start=2000, end=2020)

    # Reset index for easy handling
    data.reset_index(inplace=True)

    # Display the data
    return data
# Example countries: Niger (NER), Nigeria (NGA), Ghana (GHA)
gdp_data = get_world_bank_gdp_data(['NER', 'NGA', 'GHA'])
gdp_data.head()

  data = wb.download(indicator=indicator, country=countries, start=2000, end=2020)


Unnamed: 0,country,year,NY.GDP.MKTP.CD
0,Ghana,2020,70043100000.0
1,Ghana,2019,68337970000.0
2,Ghana,2018,67298910000.0
3,Ghana,2017,60405920000.0
4,Ghana,2016,56164930000.0


In [9]:
lat, lon = get_coordinates("Niamey, Niger")
temp_humi_rain_data(lat, lon )

Coordinates 13.532513618469238°N 2.142857313156128°E
Elevation 212.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


Unnamed: 0,date,temperature_2m_mean,apparent_temperature_mean,precipitation_sum,rain_sum,latitude,longitude
0,2000-01-01 00:00:00+00:00,23.413996,20.230715,0.0,0.0,13.524834,2.109823
1,2000-01-02 00:00:00+00:00,24.505667,19.991339,0.0,0.0,13.524834,2.109823
2,2000-01-03 00:00:00+00:00,23.834831,18.513672,0.0,0.0,13.524834,2.109823
3,2000-01-04 00:00:00+00:00,23.905664,19.644653,0.0,0.0,13.524834,2.109823
4,2000-01-05 00:00:00+00:00,23.507750,19.843269,0.0,0.0,13.524834,2.109823
...,...,...,...,...,...,...,...
8762,2023-12-28 00:00:00+00:00,22.600000,18.357813,0.0,0.0,13.524834,2.109823
8763,2023-12-29 00:00:00+00:00,22.835417,18.744970,0.0,0.0,13.524834,2.109823
8764,2023-12-30 00:00:00+00:00,22.856247,19.008600,0.0,0.0,13.524834,2.109823
8765,2023-12-31 00:00:00+00:00,24.052084,20.331644,0.0,0.0,13.524834,2.109823


In [11]:
def retrieve_data(df):
    columns_to_check = [
        ['viirs_ntl_annual_v21_avg_masked.2012.mean', 'viirs_ntl_annual_v21_avg_masked.2013.mean', 'viirs_ntl_annual_v21_avg_masked.2014.mean', 'viirs_ntl_annual_v21_avg_masked.2015.mean', 'viirs_ntl_annual_v21_avg_masked.2016.mean', 'viirs_ntl_annual_v21_avg_masked.2017.mean', 'viirs_ntl_annual_v21_avg_masked.2018.mean', 'viirs_ntl_annual_v21_avg_masked.2019.mean', 'viirs_ntl_annual_v21_avg_masked.2020.mean', 'viirs_ntl_annual_v21_avg_masked.2021.mean', 'viirs_ntl_annual_v22_avg_masked.2022.mean', 'viirs_ntl_annual_v22_avg_masked.2023.mean'],
        ['asdf_id', 'worldbank_geocodedresearchrelease_level1_v1_4_2.f446f18.sum'],
        ['map_pf_incidence_rate.2000.mean', 'map_pf_incidence_rate.2001.mean', 'map_pf_incidence_rate.2002.mean', 'map_pf_incidence_rate.2003.mean', 'map_pf_incidence_rate.2004.mean', 'map_pf_incidence_rate.2005.mean', 'map_pf_incidence_rate.2006.mean', 'map_pf_incidence_rate.2007.mean', 'map_pf_incidence_rate.2008.mean', 'map_pf_incidence_rate.2009.mean', 'map_pf_incidence_rate.2010.mean', 'map_pf_incidence_rate.2011.mean', 'map_pf_incidence_rate.2012.mean', 'map_pf_incidence_rate.2013.mean', 'map_pf_incidence_rate.2014.mean', 'map_pf_incidence_rate.2015.mean', 'map_pf_incidence_rate.2016.mean', 'map_pf_incidence_rate.2017.mean', 'map_pf_incidence_rate.2018.mean', 'map_pf_incidence_rate.2019.mean', 'map_pf_incidence_rate.2020.mean'],
        ['gpw_v4_rev11_density.2000.mean', 'gpw_v4_rev11_density.2005.mean', 'gpw_v4_rev11_density.2010.mean', 'gpw_v4_rev11_density.2015.mean', 'gpw_v4_rev11_density.2020.mean'],
        ['ucdp_deaths_171.2006.sum', 'ucdp_deaths_171.2005.sum', 'ucdp_deaths_171.2004.sum', 'ucdp_deaths_171.2003.sum', 'ucdp_deaths_171.2001.sum', 'ucdp_deaths_171.2002.sum', 'ucdp_deaths_171.2007.sum', 'ucdp_deaths_171.2008.sum', 'ucdp_deaths_171.2009.sum', 'ucdp_deaths_171.2012.sum', 'ucdp_deaths_171.2011.sum', 'ucdp_deaths_171.2010.sum', 'ucdp_deaths_171.2014.sum', 'ucdp_deaths_171.2013.sum', 'ucdp_deaths_171.2015.sum', 'ucdp_deaths_171.2016.sum'],
        ['Level'],
        ['gqid'],
        ['id'],
        ['shapeGroup'],
        ['shapeID'],
        ['shapeISO'],
        ['shapeName'],
        ['shapeType']
    ]
    
    available_columns = []
    for column_group in columns_to_check:
        if set(column_group).issubset(df.columns):
            available_columns.extend(column_group)
    
    if available_columns:
        df_modified = df[available_columns]
        print("DataFrame with available columns:")
        print(df_modified)
    else:
        print("None of the specified columns are available in the DataFrame.")
    
    return df_modified if available_columns else None

In [12]:

gdf_world = gpd.read_file("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/geoBoundariesCGAZ_ADM1.geojson") #1-2min time processing 



#####################CSV IMPORT ####################
#Water Data
WaterData = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/jfkt-jmqa.csv")
#Food Security Data
FoodSecurity_data = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/FEWS June 2024 Update TrueBoundaries_08-30-24.csv")
#Malaria Data
malaria_data = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Subnational_Unit-data.csv")
#AIDDATA Data
GNB = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Guinea-Bissau/Guinea-Bissau.csv")
AFG = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Afghanistan/Afghanistan.csv")
BFA = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Burkina Faso/Burkina Faso.csv")
BEN = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Benin/Benin.csv")
BGD = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Bangladesh/Bangladesh.csv")
CIV = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Côte d'Ivoire/Cote d'Ivoire.csv")
CPV = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Cabo Verde/Cabo Verde.csv")
GMB = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Gambia/Gambia.csv")
GHA = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Ghana/Ghana.csv")

GIN = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Guinea/Guinea.csv")
IND = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/India/India.csv")
LAO = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Laos/Laos.csv")
LBR = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Liberia/Liberia.csv")
MYS = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Malaysia/Malaysia.csv") 
MLI = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Mali/Mali.csv")
NPL = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Nepal/Nepal.csv")
NER = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Niger/Niger.csv")
NGA = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Nigeria/Nigeria.csv")

PAK = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Pakistan/Pakistan.csv")
PHL = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Philippines/Philippines.csv")
SEN = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Sénegal/Sénegal.csv")
SLE = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Sierra Leone/Sierra Leone.csv")
LKA = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Sri Lanka/Sri Lanka.csv")
THA = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Thailand/Thailand.csv")
TGO = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Togo/Togo.csv") 
VNM = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Vietnam/Vietnam.csv")
YEM = pd.read_csv("C:/Users/bachi/Documents/GIT_PROJECTS/GIS_Project/data/Yemen/Yemen.csv")


In [13]:
dataframes = [GNB, AFG, BFA, BEN, BGD, CIV, CPV, GMB, GHA, GIN, IND, LAO, LBR, MYS, MLI, NPL, NER, NGA, PAK, PHL, SEN, SLE, LKA, THA, TGO, VNM, YEM]
concatenated_countries = pd.concat([retrieve_data(df) for df in dataframes], ignore_index=True)

DataFrame with available columns:
   viirs_ntl_annual_v21_avg_masked.2012.mean  \
0                                   0.757519   
1                                   0.000336   
2                                   0.016781   
3                                   0.000142   
4                                   0.000229   
5                                   0.000166   
6                                   0.000154   
7                                   0.000000   
8                                   0.000430   

   viirs_ntl_annual_v21_avg_masked.2013.mean  \
0                                   0.677764   
1                                   0.000402   
2                                   0.016446   
3                                   0.000227   
4                                   0.000326   
5                                   0.000236   
6                                   0.000265   
7                                   0.000000   
8                                   0.000836   

   v

In [14]:
df = malaria_data
df['Metric_Unit'] = df['Metric'] + ' (' + df['Units'] + ')'

# Now, let's pivot the dataframe
malaria_data_transformed = df.pivot_table(
    index=['Name', 'ISO3', 'National Unit', 'Admin Level'],
    columns=['Metric_Unit', 'Year'],
    values='Value',
    aggfunc='first'  # In case of duplicates, take the first value
).reset_index()

# Flatten the multi-level column names
malaria_data_transformed.columns = [
    f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col 
    for col in malaria_data_transformed.columns
]

In [15]:
# Ensure column names are consistent
Concatenated_countries = concatenated_countries.rename(columns={'shapeName': 'region_name'})
malaria_data_transformed = malaria_data_transformed.rename(columns={'Name_': 'region_name'})
gdf_world = gdf_world.rename(columns={'shapeName': 'region_name'})
food_security = FoodSecurity_data.rename(columns={'country': 'region_name'})  # Assuming it's already named 'country'

# Merge the dataframes
# combined_df = gdf_world.merge(Concatenated_countries, on='region_name', how='left')
# combined_df = combined_df.merge(malaria_data, on='region_name', how='left')
# combined_df = combined_df.merge(food_security, on='region_name', how='left')



In [23]:
print(len(malaria_data_transformed), len(malaria_data_transformed["region_name"].unique()))
print(len(gdf_world), len(gdf_world["region_name"].unique()))
print(len(food_security), len(food_security["region_name"].unique()))        

1840 1782
3224 3143
297232 22


In [128]:
malaria_data_transformed = malaria_data_transformed.set_index('region_name')

In [131]:
# gdf_world = gdf_world.set_index('region_name')
# Concatenated_countries = Concatenated_countries.set_index('region_name')
# malaria_data_transformed = malaria_data_transformed.set_index('region_name')
# food_security = food_security.set_index('region_name')

# # Now concatenate the dataframes
# combined_df = pd.concat([gdf_world, Concatenated_countries, malaria_data_transformed, food_security], 
#                         axis=1, 
#                         join='outer')

# # Reset the index to make 'region_name' a column again
# combined_df = combined_df.reset_index()

In [None]:
# Check for any countries that didn't match
print(combined_df[combined_df['column_from_Concatenated_countries'].isnull()]['region_name'].unique())
print(combined_df[combined_df['column_from_malaria_data'].isnull()]['region_name'].unique())
print(combined_df[combined_df['column_from_food_security'].isnull()]['region_name'].unique())

# Check for duplicate columns
print([col for col in combined_df.columns if col.endswith('_x') or col.endswith('_y')])

In [42]:
# gdf_test = gdf_world.to_crs(epsg=3857) 
# # plot it!
# fig, ax = plt.subplots(figsize=(60,15))

# gdf_test.plot(ax=ax,
#          color='black', 
#          edgecolor='white',
#          lw=0.7,
#          alpha=0.45)

# # no axis
# ax.axis('off')
# # add a basemap
# ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
# # Save the figure instead of showing it
# output_path = 'plots/world_map.png'
# plt.savefig(output_path, dpi=300, bbox_inches='tight')
# plt.close(fig)  # Close the figure to free up memory

# print(f"Plot saved as '{output_path}'")

Plot saved as 'world_map.png'


In [83]:
# from plotnine import ggplot, aes, geom_polygon, theme_void, ggtitle, theme, element_text, coord_fixed, scale_x_continuous, scale_y_continuous
# from shapely.geometry import Polygon, MultiPolygon

In [71]:
# # Reproject the data to Web Mercator (EPSG:3857)
# gdf_test = gdf_world.to_crs(epsg=3857)

# # Function to extract x and y coordinates from Polygons and MultiPolygons
# def get_coords(geom):
#     if isinstance(geom, Polygon):  # If it's a simple Polygon
#         exterior = geom.exterior
#         return exterior.coords.xy  # No need to convert to list; it returns arrays
#     elif isinstance(geom, MultiPolygon):  # If it's a MultiPolygon
#         coords = []
#         for polygon in geom.geoms:  # Iterate over the individual polygons
#             exterior = polygon.exterior
#             coords.append(exterior.coords.xy)
#         return coords
#     return None

# # Apply the function to extract coordinates
# gdf_test['coords'] = gdf_test.geometry.apply(get_coords)

# # Flatten the coordinates into x and y for Plotnine
# rows = []
# for idx, row in gdf_test.iterrows():
#     coords = row['coords']
#     if coords:  # If there are valid coordinates
#         if isinstance(coords[0][0], list):  # For MultiPolygon, unpack the parts
#             for part in coords:
#                 for x_array, y_array in zip(part[0], part[1]):  # Flatten the arrays
#                     rows.append([row['shapeName'], float(x_array), float(y_array)])  # Convert to floats
#         else:  # For simple Polygon
#             for x_val, y_val in zip(coords[0], coords[1]):  # Directly iterate over the x and y arrays
#                 for x, y in zip(x_val, y_val):  # Flatten the arrays
#                     rows.append([row['shapeName'], float(x), float(y)])  # Convert to floats

# # Create a DataFrame with the extracted coordinates
# df = pd.DataFrame(rows, columns=['shapeName', 'x', 'y'])

# # Create the plot using Plotnine
# plot = (ggplot(df, aes(x='x', y='y', group='shapeName', fill='shapeName'))
#         + geom_polygon(color='white', size=0.7, alpha=0.45)
#         + theme_void()  # Remove axes and background
#         + ggtitle("World Map (Web Mercator Projection)")
#        )

In [84]:
gdf_test = gdf_world.to_crs('+proj=robin')

# Function to extract x and y coordinates from Polygons and MultiPolygons
def get_coords(geom):
    if isinstance(geom, Polygon):
        return list(geom.exterior.coords)
    elif isinstance(geom, MultiPolygon):
        return [list(poly.exterior.coords) for poly in geom.geoms]
    return None

# Apply the function to extract coordinates
gdf_test['coords'] = gdf_test.geometry.apply(get_coords)

# Flatten the coordinates into x and y for Plotnine
rows = []
for idx, row in gdf_test.iterrows():
    coords = row['coords']
    if coords:
        if isinstance(coords[0], list):  # MultiPolygon
            for part in coords:
                rows.extend([[x, y] for x, y in part])
        else:  # Polygon
            rows.extend([[x, y] for x, y in coords])

# Create a DataFrame with the extracted coordinates
df = pd.DataFrame(rows, columns=['x', 'y'])

# Create the plot using Plotnine
plot = (ggplot(df, aes(x='x', y='y'))
        + geom_polygon(fill='lightblue', color='white', size=0.1)
        + coord_fixed(ratio=1.7)  # This replaces coord_map
        + scale_x_continuous(limits=(-1.8e7, 1.8e7))
        + scale_y_continuous(limits=(-0.9e7, 0.9e7))
        + theme_void()
        + theme(figure_size=(15, 7))
        + ggtitle("World Map (Robinson Projection)")
        + theme(plot_title=element_text(size=20, face="bold", hjust=0.5))
       )

output_path = 'plots/plotnine_world_map_robinson.png'
plot.save(output_path, dpi=300, width=20, height=10)
# Display the plot
plt.figure(figsize=(30, 15))
plot.draw()
plot.show()

In [86]:
import matplotlib.pyplot as plt
import geopandas as gpd

# Assuming gdf_world is your GeoDataFrame with world data
gdf_robinson = gdf_world.to_crs('+proj=robin')

# Create the plot
fig, ax = plt.subplots(figsize=(20, 10))
gdf_robinson.plot(ax=ax, color='lightblue', edgecolor='white', linewidth=0.5)

# Remove axes
ax.axis('off')

# Add title
plt.title('World Map (Robinson Projection)', fontsize=20, fontweight='bold')

# Adjust layout and save
plt.tight_layout()
plt.savefig('plots/world_map_robinson.png', dpi=300, bbox_inches='tight')

# Display the plot
plt.show()

