In [None]:
%pip install geopandas plotly

In [None]:
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.io as pio
import numpy as np
import os 
import sys
sys.path.append(os.path.join(os.path.abspath(".."), "functions"))

import temporal_utils

In [None]:
from environment import dh, pio_renderer
if pio_renderer is not None:
    pio.renderers.default = pio_renderer

# **Variables**

In [None]:
PROJECT_NAME = "AreaVerde"

# **Load Data**

In [None]:
# Load data
project = dh.get_or_create_project(PROJECT_NAME)

spira = project.get_dataitem("spira_flow_data_2024")
spira5m = project.get_dataitem("spire_flow5m_2024")
accuracy = project.get_dataitem("spira_accur_data_2024")
accuracy5m = project.get_dataitem("spire_flow5m_accur_2024")

In [None]:
spira = spira.as_df()

In [None]:
spira_id_map = spira[['spira_code', 'spira_unique_id']].drop_duplicates()

In [None]:
spira_locations = spira[['spira_unique_id', 'spira_code', 'longitudine', 'latitudine']].drop_duplicates()

In [None]:
spire_in = ['0.127 4.65 4 1', '0.127 3.84 2 1', '0.127 4.54 4 1', '2.2 2.2 8 1',
       '0.127 4.11 2 1', '2.7 2.2 2 1', '2.84 0.127 8 1']
spire_out = ['0.127 4.65 8 1', '0.127 3.84 6 1', '0.127 4.54 8 1', '2.2 2.2 4 1',
       '0.127 4.11 6 1', '2.7 2.2 2 2', '2.84 0.127 4 1']

In [None]:
sel = [[('sensor_id', '==', id)] for id in spire_in + spire_out]

In [None]:
sel

In [None]:
start_date = datetime(2024, 6, 1)
end_date = datetime(2024, 7, 31)

In [None]:
spira5m.download("./tmp/Spira5mFlowData.parquet", overwrite=True)
spira5m = pd.read_parquet("./tmp/Spira5mFlowData.parquet", filters=sel, engine="pyarrow")
spira5m = pd.merge(spira5m, spira_id_map, left_on='sensor_id', right_on='spira_code', how='left')
spira5m = spira5m[['spira_unique_id', 'spira_code', 'value', 'start']].rename(columns={'value': 'count', 'start': 'DateTime'})
spira5m['DateTime'] = pd.to_datetime(spira5m['DateTime'], format='%Y-%m-%d %H:%M')
spira5m = spira5m[spira5m['DateTime'].between(start_date, end_date, inclusive='left')]
spira5m = spira5m.sort_values('DateTime')
spira5m

In [None]:
accuracy = accuracy.as_df()
accuracy['DateTime'] = pd.to_datetime(accuracy['DateTime'])
accuracy = accuracy[accuracy['DateTime'].between(start_date, end_date, inclusive='left')]
accuracy = accuracy.sort_values('DateTime')
accuracy

In [None]:
holiday_namefile = "data/holiday_list.csv"
holiday_list = pd.read_csv(holiday_namefile, header=None)[0].to_list()

is_week = spira5m['DateTime'].apply(
    lambda x: temporal_utils.add_daytype(x, holiday_list=holiday_list)
    ) == "Weekday" 
spira5m = spira5m[is_week]

is_week = accuracy['DateTime'].apply(
    lambda x: temporal_utils.add_daytype(x, holiday_list=holiday_list)
    ) == "Weekday" 
accuracy = accuracy[is_week]

# **Plot spira data**

In [None]:
def plot_spira(spira_id, start=0, days=0):
    spira5m_sel = spira5m[spira5m['spira_code'] == spira_id]
    accuracy_sel = accuracy[accuracy['spira_code'] == spira_id]
    plt.figure(figsize=(12,3))
    plt.plot(spira5m_sel.iloc[24*12*start:24*12*(start+days) if days else -1]['DateTime'], 
             spira5m_sel.iloc[24*12*start:24*12*(start+days) if days else -1]['count'])
    plt.plot(accuracy_sel.iloc[24*start:24*(start+days) if days else -1]['DateTime'], 
             accuracy_sel.iloc[24*start:24*(start+days) if days else -1]['count'])
    
def map_spira(spira_id):
    data_points = spira_locations[spira_locations['spira_code'] == spira_id].copy()
    data_points['size'] = 1
    fig = px.scatter_mapbox(
        data_points,
        lat='latitudine',
        lon='longitudine',
        mapbox_style='open-street-map',
        size='size',
        size_max=12,
        zoom=11.75,
        height=400,
    )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

In [None]:
for si,so in zip(spire_in, spire_out):
    plot_spira(si)#, start=10, days=1)
    plot_spira(so)#, start=10, days=1)

# Calculation of out-flow

In [None]:
flow_in = spira5m[spira5m['spira_code'].isin(spire_in)]
flow_in['time'] = flow_in['DateTime'].dt.time
flow_in = flow_in.drop(columns=['spira_unique_id','spira_code','DateTime'])
flow_in = flow_in.groupby('time').sum()

In [None]:
flow_out = spira5m[spira5m['spira_code'].isin(spire_out)]
flow_out['time'] = flow_out['DateTime'].dt.time
flow_out = flow_out.drop(columns=['spira_unique_id','spira_code','DateTime'])
flow_out = flow_out.groupby('time').sum()

In [None]:
flow = pd.merge(flow_in,flow_out, on='time', suffixes=['_in','_out'])
flow = flow / flow.mean()
flow.plot();

In [None]:
flow['out-in-ratio'] = flow['count_out'] / flow['count_in']
flow['out-in-ratio'].plot();

In [None]:
flow['out-in-ratio'].values

In [None]:
# TODO: This should come from the data lake
# inflow from gate estimation (vehicles/hour) computed for weekdays only
in_flow_week = np.array([ 
    221.25 , 208.70090169, 196.59507377, 184.93251626,173.71322915, 162.93721244, 152.60446612, 142.71499021,
    133.2687847 , 124.26584959, 115.70618488, 107.58979057,99.91666667, 92.68681316, 85.90023005, 79.55691735,
    73.65687504, 68.20010313, 63.18660163, 58.57940003,54.34152786, 50.4729851 , 46.97377176, 43.84388784,
    41.08333333, 38.69210825, 36.67021258, 35.01764633,33.73440951, 32.82050209, 32.2759241 , 31.97296144,
    31.78390001, 31.70873982, 31.74748086, 31.90012315,32.16666667, 32.54711142, 33.04145742, 33.64970465,
    34.37185312, 35.20790282, 36.15785376, 37.26801653,38.58470172, 40.10790933, 41.83763935, 43.7738918 ,
    45.91666667, 48.26596395, 50.82178366, 53.58412578,56.55299032, 59.72837728, 63.11028667, 67.02310605,
    71.79122301, 77.41463755, 83.89334968, 91.22735938,99.41666667, 108.46127153, 118.36117398, 129.11637401,
    140.72687162, 153.19266681, 166.51375958, 181.49843978,198.95499726, 218.88343202, 241.28374407, 266.15593339,
    293.5 , 323.31594389, 355.60376506, 390.36346351,427.59503924, 467.29849225, 509.47382255, 552.31727381,
    594.02508973, 634.59727031, 674.03381555, 712.33472545,749.5 , 785.52963921, 820.42364308, 854.18201161,886.80474479, 918.29184263, 948.64330513, 976.28773216,
    999.65372359, 1018.74127943, 1033.55039966, 1044.0810843 ,1050.33333333, 1052.30714677, 1050.00252461, 1043.41946686,
    1032.5579735 , 1017.41804455, 997.99967999, 977.02114803,957.20071686, 938.53838647, 921.03415686, 904.68802804,
    889.5 , 875.47007275, 862.59824628, 850.88452059,840.32889569, 830.93137158, 822.69194825, 815.37241668,
    808.73456786, 802.77840178, 797.50391844, 792.91111785,
    789. , 785.77056489, 783.22281253, 781.35674292,780.17235604, 779.66965191, 779.84863052, 780.46888892,
    781.29002415, 782.3120362 , 783.53492508, 784.9586908 ,786.58333333, 788.4088527 , 790.43524889, 792.66252192,
    795.09067177, 797.71969844, 800.54960195, 803.02721276,804.59936133, 805.26604768, 805.02727179, 803.88303368,
    801.83333333, 798.87817076, 795.01754596, 790.25145892,784.57990966, 778.00289817, 770.52042444, 763.0159827 ,
    756.37306713, 750.59167774, 745.67181454, 741.61347751,738.41666667, 736.081382 , 734.60762352, 733.99539122,
    734.24468509, 735.35550515, 737.32785139, 739.61300218,741.6622359 , 743.47555253, 745.0529521 , 746.39443459,
    747.5 , 748.36964834, 749.0033796 , 749.40119378,749.56309089, 749.48907093, 749.17913389, 749.28980052,
    750.47759157, 752.74250705, 756.08454694, 760.50371126,766. , 772.57341316, 780.22395074, 788.95161274,
    798.75639917, 809.63831001, 821.59734528, 834.56717618,848.48147392, 863.34023851, 879.14346994, 895.89116821,
    913.58333333, 932.2199653 , 951.8010641 , 972.32662975,993.79666225, 1016.21116159, 1039.57012777, 1062.26316093,
    1082.67986119, 1100.82022857, 1116.68426305, 1130.27196464,1141.58333333, 1150.61836914, 1157.37707205, 1161.85944207,
    1164.06547919, 1163.99518343, 1161.64855477, 1157.67561751,1152.72639595, 1146.80089008, 1139.89909992, 1132.02102544,
    1123.16666667, 1113.33602359, 1102.5290962 , 1090.74588451,1077.98638852, 1064.25060823, 1049.53854363, 1034.03498586,
    1017.92472607, 1001.20776426, 983.88410042, 965.95373456,947.41666667, 928.27289675, 908.52242481, 888.16525085,
    867.20137486, 845.63079684, 823.4535168 , 800.82280064,777.89191427, 754.66085769, 731.12963089, 707.29823388,
    683.16666667, 658.73492924, 634.0030216 , 608.97094374,583.63869568, 558.0062774 , 532.07368891, 506.86154363,
    483.39045498, 461.66042296, 441.67144757, 423.4235288 ,406.91666667, 392.15086116, 379.12611228, 367.84242003,
    358.29978441, 350.49820541, 344.43768305, 339.53803014,335.21905953, 331.48077121, 328.32316518, 325.74624144,
    323.75 , 322.33444085, 321.49956399, 321.24536942,321.57185714, 322.47902716, 323.96687946, 325.32925698,
    325.86000264, 325.55911644, 324.42659838, 322.46244845,319.66666667, 316.03925302, 311.58020751, 306.28953013,
    300.1672209 , 293.2132798 , 285.42770685, 276.81050203,267.36166534, 257.0811968 , 245.9690964 , 234.02536413
])

in_flow = in_flow_week # move to hourly vehicles, not 5m-vehicles

In [None]:
raw_out_flow = in_flow * flow['out-in-ratio'].values

In [None]:
from scipy.signal import savgol_filter
smoothed_out_flow = savgol_filter(raw_out_flow, 12*2+1, 2)

In [None]:
plt.plot(raw_out_flow)
plt.plot(smoothed_out_flow)

In [None]:
out_flow = smoothed_out_flow * sum(in_flow) / sum(smoothed_out_flow)
print(sum(in_flow), sum(out_flow))

In [None]:
plt.plot(in_flow)
plt.plot(out_flow)

In [30]:
print(out_flow)

[ 233.38097382  220.82386919  208.66447564  196.90279317  185.53882178
  174.57256146  164.00401223  153.83317407  144.06004699  134.68463099
  125.70692607  117.12693223  108.94464946  101.0321295    94.13377195
   86.99545139   79.56647229   72.89347297   66.43539924   61.17103286
   57.63701658   53.34527257   50.4033317    47.759661     45.73497488
   43.90603271   41.70428204   39.91449515   39.15679155   38.09222034
   37.59168808   37.72430755   37.48694063   37.97971866   38.86532297
   40.00690705   41.1382239    41.27255557   42.03582926   43.44062896
   45.01849213   47.16432432   49.31850276   52.21059806   54.77757331
   57.14070142   59.22688828   61.83896169   65.21928226   67.04735569
   70.0809006    71.92922162   72.7786066    75.16039375   78.58379158
   83.60695876   89.25750502   93.98382952  102.12187753  108.64578606
  114.67457031  123.2834927   133.47508488  145.54121758  156.08214232
  168.81025967  183.87309895  202.2234539   224.18759278  243.95564543
  265.