In [2]:
# Define Imports
import os
import googlemaps
import pandas as pd
import plotly.express as px
import dash_bootstrap_components as dbc
import pydeck as pdk
from dash import Dash, dcc, html
from dotenv import load_dotenv
import numpy as np

In [2]:
# Load ENV variables
load_dotenv(dotenv_path='../../.env')

True

In [3]:
# Read Silver Submissions Dataset
submissions_silver_df = pd.read_csv("../../data/submissions_silver.csv")
submissions_silver_df

Unnamed: 0,initiator_region,item_classification,submission_status,number_of_submissions,latitude,longitude
0,Львівська,kitchen,pending,21,49.839683,24.029717
1,Кіровоградська,kitchen,pending,17,48.507933,32.262317
2,Харківська,shelter,approval,16,50.002013,36.307399
3,Київська,kitchen,pending,16,50.052951,30.766713
4,Одеська,kitchen,pending,16,46.484583,30.732600
...,...,...,...,...,...,...
75,Львівська,kitchen,declined,1,49.839683,24.029717
76,Івано-Франківська,kitchen,approval,1,48.920062,24.708916
77,Житомирська,kitchen,approval,1,50.254650,28.658667
78,Київська,kitchen,approval,1,50.052951,30.766713


In [6]:
# Add Data Augmentations Specific for the Plots
# For instance, the function below will transform each sample's position
#   so at the pydeck map it would look like a BarPlot.
def create_vertical_bars(df, lat_col='latitude', long_col='longitude', step=0.001):
    new_rows = []

    # Determine unique values for each groupby option globally
    unique_values = {
        'item_classification': df['item_classification'].unique(),
        'submission_status': df['submission_status'].unique()
    }

    for groupby, values in unique_values.items():
        for region in df['initiator_region'].unique():
            region_df = df[df['initiator_region'] == region]

            # Calculate the initial offset to center the bar
            n_points = len(values)
            initial_offset = -(n_points // 2) * step

            for i, value in enumerate(values):
                relevant_rows = region_df[region_df[groupby] == value]

                if relevant_rows.empty:
                    # Create a placeholder row if the category is missing
                    new_lat = region_df[lat_col].iloc[0] + step
                    new_long = region_df[long_col].iloc[0] + initial_offset + i * step
                    placeholder_row = region_df.iloc[0].copy()
                    placeholder_row[groupby] = value
                    placeholder_row[f'{lat_col}_grid_{groupby}'] = new_lat
                    placeholder_row[f'{long_col}_grid_{groupby}'] = new_long
                    placeholder_row['number_of_submissions'] = 0
                    placeholder_row['color'] = [0, 0, 0]
                    new_rows.append(placeholder_row)
                else:
                    for _, row in relevant_rows.iterrows():
                        new_lat = row[lat_col] + step
                        new_long = row[long_col] + initial_offset + i * step
                        new_row = row.copy()
                        new_row[f'{lat_col}_grid_{groupby}'] = new_lat
                        new_row[f'{long_col}_grid_{groupby}'] = new_long
                        new_rows.append(new_row)

    return pd.DataFrame(new_rows)


submissions_golden_df = create_vertical_bars(submissions_silver_df, step=0.2 * 0.8)
submissions_golden_df

Unnamed: 0,initiator_region,item_classification,submission_status,number_of_submissions,latitude,longitude,latitude_grid_item_classification,longitude_grid_item_classification,color,latitude_grid_submission_status,longitude_grid_submission_status
0,Львівська,kitchen,pending,21,49.839683,24.029717,49.999683,23.869717,,,
32,Львівська,kitchen,withdrawn,5,49.839683,24.029717,49.999683,23.869717,,,
38,Львівська,kitchen,approval,4,49.839683,24.029717,49.999683,23.869717,,,
75,Львівська,kitchen,declined,1,49.839683,24.029717,49.999683,23.869717,,,
0,Львівська,kitchen,pending,21,49.839683,24.029717,49.999683,23.869717,,49.999683,23.549717
...,...,...,...,...,...,...,...,...,...,...,...
64,Закарпатська,shelter,withdrawn,1,48.620800,22.287883,,,,48.780800,21.967883
49,Закарпатська,kitchen,approval,0,48.620800,22.287883,,,"[0, 0, 0]",48.780800,22.127883
49,Закарпатська,kitchen,declined,0,48.620800,22.287883,,,"[0, 0, 0]",48.780800,22.287883
49,Закарпатська,kitchen,active,0,48.620800,22.287883,,,"[0, 0, 0]",48.780800,22.447883


In [7]:
# Save Preprocessed Dataset
submissions_golden_df.to_csv("../../data/submissions_golden.csv", index=False)