In [None]:
from os import getenv
from pathlib import Path

input_folder = Path(getenv(
    'CROSSCOMPUTE_INPUT_FOLDER', 'batches/standard/input'))
output_folder = Path(getenv(
    'CROSSCOMPUTE_OUTPUT_FOLDER', 'batches/standard/output'))

output_folder.mkdir(parents=True, exist_ok=True)

In [None]:
import csv

def extract_data(csv_path, series_code, value_column):
    """
    Extracts data from a CSV file.

    Args:
        csv_path (str): Path to the CSV file.
        series_code (str): The series code to extract.
        value_column (str): The column to extract.

    Returns:
        dict: A dictionary with the country code as the key and the value as the value.
    """

    data = {}

    with open(csv_path, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            country_code = row['Country Code']
            current_series_code = row['Series Code']
            value = str(row[value_column])

            try:
                value = float(value)
            except ValueError:
                value = None

            if current_series_code == series_code:
                data[country_code] = value

    return data

In [None]:
import json

var_path = input_folder / 'variables.dictionary'

data = {}

with var_path.open('rt') as f:
    data = json.load(f)

csv_path = input_folder / 'data.csv'
series_code = data['series_code']
value_column = data['value_column']
fig_title = data.get('fig_title')
legend_text = data.get('legend_text')
min_color = data.get('min_color', '#ffffff')
max_color = data.get('max_color', '#ff0000')

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

mock_data = extract_data(csv_path, series_code, value_column)

In [None]:

from hashlib import blake2b

def get_hash(text):
    "Return the hash of the given text."
    h = blake2b()
    h.update(text.encode())
    return h.hexdigest()

geojson_folder = Path('geojson')
geojson_folder.mkdir(exist_ok=True)

geojson_url = 'https://github.com/datasets/geo-countries/raw/master/data/countries.geojson'

geojson_path = geojson_folder / f'{get_hash(geojson_url)}.geojson'

world_gdf = None

if not geojson_path.exists():
    world_gdf = gpd.read_file(geojson_url)
    world_gdf.to_file(geojson_path, driver='GeoJSON')
else:
    world_gdf = gpd.read_file(geojson_path)


merged_gdf = world_gdf.merge(
    gpd.GeoDataFrame({'ISO_A3': list(mock_data.keys()), 'value': list(mock_data.values())}),
    on='ISO_A3'
)


In [None]:
from matplotlib.colors import LinearSegmentedColormap
import numpy as np

num_colors = 1000

min_value = merged_gdf['value'].min()
max_value = merged_gdf['value'].max()

normalized_values = (merged_gdf['value'] - min_value) / (max_value - min_value)

colors = np.array([min_color, max_color])
cmap = LinearSegmentedColormap.from_list('custom_gradient', colors)


In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

# Plot the map with color based on the 'value' column
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

merged_gdf.to_crs(epsg=3857, inplace=True)

merged_gdf.plot(column='value', cmap=cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, cax=cax, legend_kwds={'label': legend_text}, missing_kwds={'color': 'grey', "hatch": "///", 'label': 'Missing values'})

plt.title(fig_title, fontsize=12, fontweight='bold', color='black', loc='right', pad=20, )

# Save the plot as an image
output_image_path = output_folder /'geographic_visualization.png'
plt.savefig(output_image_path, dpi=300, bbox_inches='tight')

print(f"Colored map saved as '{output_image_path}'.")

# import os
# import time

# Sleep for a specified duration (e.g., 30 minutes)
# time.sleep(900)  # 900 seconds = 15 minutes

# # Remove the image file after the specified duration
# if os.path.exists(output_image_path):
#     os.remove(output_image_path)
