In [6]:
# Define Imports
import os
import googlemaps
import pandas as pd
import plotly.express as px
import dash_bootstrap_components as dbc
import pydeck as pdk
from dash import Dash, dcc, html
from dotenv import load_dotenv
import numpy as np

In [7]:
# Load ENV variables
load_dotenv(dotenv_path='../../.env')

True

In [8]:
# Read Silver Submissions Dataset
submissions_silver_df = pd.read_csv("../../data/submissions_silver.csv")
submissions_silver_df

Unnamed: 0,initiator_region,item_classification,submission_status,number_of_submissions,latitude,longitude
0,Львівська,kitchen,pending,21,49.839683,24.029717
1,Кіровоградська,kitchen,pending,17,48.507933,32.262317
2,Сумська,kitchen,pending,17,50.907700,34.798100
3,Харківська,shelter,approval,16,50.002013,36.307399
4,Київська,kitchen,pending,16,50.052951,30.766713
...,...,...,...,...,...,...
81,Івано-Франківська,kitchen,approval,1,48.920062,24.708916
82,Житомирська,kitchen,approval,1,50.254650,28.658667
83,Київська,kitchen,approval,1,50.052951,30.766713
84,Сумська,kitchen,approval,1,50.907700,34.798100


In [9]:
# Add Data Augmentations Specific for the Plots
# For instance, the function below will transform each sample's position
#   so at the pydeck map it would look like a BarPlot.
def create_vertical_bars(df, lat_col='latitude', long_col='longitude', step=0.001):
    new_rows = []

    # Determine unique values for each groupby option globally
    unique_values = {
        'item_classification': df['item_classification'].unique(),
        'submission_status': df['submission_status'].unique()
    }

    for groupby, values in unique_values.items():
        for region in df['initiator_region'].unique():
            region_df = df[df['initiator_region'] == region]

            # Calculate the initial offset to center the bar
            n_points = len(values)
            initial_offset = -(n_points // 2) * step

            for i, value in enumerate(values):
                relevant_rows = region_df[region_df[groupby] == value]

                if relevant_rows.empty:
                    # Create a placeholder row if the category is missing
                    new_lat = region_df[lat_col].iloc[0] + step
                    new_long = region_df[long_col].iloc[0] + initial_offset + i * step
                    placeholder_row = region_df.iloc[0].copy()
                    placeholder_row[groupby] = value
                    placeholder_row[f'{lat_col}_grid_{groupby}'] = new_lat
                    placeholder_row[f'{long_col}_grid_{groupby}'] = new_long
                    placeholder_row['number_of_submissions'] = 0
                    placeholder_row['color'] = [0, 0, 0]
                    new_rows.append(placeholder_row)
                else:
                    for _, row in relevant_rows.iterrows():
                        new_lat = row[lat_col] + step
                        new_long = row[long_col] + initial_offset + i * step
                        new_row = row.copy()
                        new_row[f'{lat_col}_grid_{groupby}'] = new_lat
                        new_row[f'{long_col}_grid_{groupby}'] = new_long
                        new_rows.append(new_row)

    return pd.DataFrame(new_rows)


submissions_golden_df = create_vertical_bars(submissions_silver_df, step=0.2 * 0.8)
submissions_golden_df

Unnamed: 0,initiator_region,item_classification,submission_status,number_of_submissions,latitude,longitude,latitude_grid_item_classification,longitude_grid_item_classification,color,latitude_grid_submission_status,longitude_grid_submission_status
0,Львівська,kitchen,pending,21,49.839683,24.029717,49.999683,23.869717,,,
34,Львівська,kitchen,withdrawn,5,49.839683,24.029717,49.999683,23.869717,,,
41,Львівська,kitchen,approval,4,49.839683,24.029717,49.999683,23.869717,,,
80,Львівська,kitchen,declined,1,49.839683,24.029717,49.999683,23.869717,,,
59,Львівська,shelter,pending,2,49.839683,24.029717,49.999683,24.029717,,,
...,...,...,...,...,...,...,...,...,...,...,...
52,Закарпатська,kitchen,approval,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,21.967883
67,Закарпатська,shelter,withdrawn,1,48.620800,22.287883,,,,48.7808,22.127883
52,Закарпатська,kitchen,active,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,22.287883
52,Закарпатська,kitchen,unsuccessful,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,22.447883


In [11]:
# Translate regions from UKR to ENG
mapping_dict: str = {
    'Івано-Франківська': 'Ivano-Frankivsk',
    'Волинська': 'Volyn',
    'Вінницька': 'Vinnytsia',
    'Дніпропетровська': 'Dnipropetrovsk',
    'Житомирська': 'Zhytomyr',
    'Закарпатська': 'Transcarpathian',
    'Запорізька': 'Zaporizhia',
    'Київська': 'Kyiv',
    'Кіровоградська': 'Kirovohrad',
    'Львівська': 'Lviv',
    'Миколаївська': 'Mykolaiv',
    'Одеська': 'Odessa',
    'Полтавська': 'Poltava',
    'Рівненська': 'Rivne',
    'Сумська': 'Sumy',
    'Тернопільська': 'Ternopil',
    'Харківська': 'Kharkiv',
    'Херсонська': 'Kherson',
    'Хмельницька': 'Khmelnytskyi',
    'Черкаська': 'Cherkasy',
    'Чернівецька': 'Chernivtsi',
    'Чернігівська': 'Chernihiv'
}
submissions_golden_df.loc[:, "initiator_region"] = submissions_golden_df.loc[:, "initiator_region"].map(mapping_dict)
submissions_golden_df

Unnamed: 0,initiator_region,item_classification,submission_status,number_of_submissions,latitude,longitude,latitude_grid_item_classification,longitude_grid_item_classification,color,latitude_grid_submission_status,longitude_grid_submission_status
0,Lviv,kitchen,pending,21,49.839683,24.029717,49.999683,23.869717,,,
34,Lviv,kitchen,withdrawn,5,49.839683,24.029717,49.999683,23.869717,,,
41,Lviv,kitchen,approval,4,49.839683,24.029717,49.999683,23.869717,,,
80,Lviv,kitchen,declined,1,49.839683,24.029717,49.999683,23.869717,,,
59,Lviv,shelter,pending,2,49.839683,24.029717,49.999683,24.029717,,,
...,...,...,...,...,...,...,...,...,...,...,...
52,Transcarpathian,kitchen,approval,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,21.967883
67,Transcarpathian,shelter,withdrawn,1,48.620800,22.287883,,,,48.7808,22.127883
52,Transcarpathian,kitchen,active,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,22.287883
52,Transcarpathian,kitchen,unsuccessful,0,48.620800,22.287883,,,"[0, 0, 0]",48.7808,22.447883


In [12]:
# Save Preprocessed Dataset
submissions_golden_df.to_csv("../../data/submissions_golden.csv", index=False)