In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap, HeatMapWithTime, MarkerCluster, Search
from branca.colormap import linear
from sklearn.preprocessing import MinMaxScaler

df = pd.read_parquet("./data/query.parquet")

df = df[(df['pickup_latitude'].notnull()) & (df['pickup_longitude'].notnull())]

pickup_coords_freq = df.groupby(['pickup_latitude', 'pickup_longitude']).size().reset_index(name='count')

freq_limit = 5000  

frequent_pickups = pickup_coords_freq[pickup_coords_freq['count'] > freq_limit]

df = df.merge(frequent_pickups[['pickup_latitude', 'pickup_longitude']],
            on=['pickup_latitude', 'pickup_longitude'],
            how='left', indicator=True)

df = df[df['_merge'] == 'left_only'].drop(columns=['_merge'])

dropoff_coords_freq = df.groupby(['dropoff_latitude', 'dropoff_longitude']).size().reset_index(name='count')
frequent_dropoffs = dropoff_coords_freq[dropoff_coords_freq['count'] > freq_limit]

df = df.merge(frequent_dropoffs[['dropoff_latitude', 'dropoff_longitude']],
            on=['dropoff_latitude', 'dropoff_longitude'],
            how='left', indicator=True)

df = df[df['_merge'] == 'left_only'].drop(columns=['_merge'])   

print(f"Número de linhas no DataFrame: {len(df)}")

scaler = MinMaxScaler()
df['trip_seconds_norm'] = scaler.fit_transform(df[['trip_seconds']])

hourly_data = {hour: group for hour, group in df.groupby('start_hour')}

heat_data = []
timestamps = []

for hour, data in hourly_data.items():
    heat_data.append(data[['pickup_latitude', 'pickup_longitude', 'trip_seconds_norm']].values.tolist())
    timestamps.append(f"{hour}:00")

mapa = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

HeatMapWithTime(
    heat_data,          
    index=timestamps,   
    radius=10,          
    auto_play=True,     
    max_opacity=0.8,    
    gradient={          
        0.2: 'blue',
        0.4: 'lime',
        0.6: 'orange',
        0.8: 'red'
    }
).add_to(mapa)

heat_data_static = df[['pickup_latitude', 'pickup_longitude']].values.tolist()
heatmap = HeatMap(heat_data_static, name='Heatmap Estático', show=True)
mapa.add_child(heatmap)

variables = ['trip_total', 'trip_seconds', 'trip_miles', 'cost_per_mile']

aggregated_df = df.groupby(['pickup_latitude', 'pickup_longitude'])[variables].mean().reset_index()

for var in variables:
    min_val = df[var].min()
    max_val = df[var].max()
    colormap = linear.YlOrRd_09.scale(min_val, max_val)
    colormap.caption = var

    feature_group_all_points = folium.FeatureGroup(name=f"{var} (All Points)", show=False)
    for _, row in df.iterrows():
        popup_content = f"{var}: {row[var]:.2f}"
        folium.CircleMarker(
            location=[row['pickup_latitude'], row['pickup_longitude']],
            radius=8,
            fill=True,
            color=colormap(row[var]),
            fill_opacity=0.5,
            stroke=False,
            popup=popup_content
        ).add_to(feature_group_all_points)

    feature_group_averaged_points = folium.FeatureGroup(name=f"{var} (Averaged Points)", show=False)
    for _, row in aggregated_df.iterrows():
        popup_content = f"{var} (média): {row[var]:.2f}"
        folium.CircleMarker(
            location=[row['pickup_latitude'], row['pickup_longitude']],
            radius=8,
            fill=True,
            color=colormap(row[var]),
            fill_opacity=0.9,
            stroke=False,
            popup=popup_content
        ).add_to(feature_group_averaged_points)

    mapa.add_child(feature_group_all_points)
    mapa.add_child(feature_group_averaged_points)
    mapa.add_child(colormap)

    feature_group_cluster = folium.FeatureGroup(name=f"{var} (Clustered)", show=False)
    marker_cluster = MarkerCluster().add_to(feature_group_cluster)
    for _, row in df.iterrows():
        popup_content = f"{var}: {row[var]:.2f}"
        folium.Marker(
            location=[row['pickup_latitude'], row['pickup_longitude']],
            popup=popup_content
        ).add_to(marker_cluster)

    mapa.add_child(feature_group_cluster)

folium.LayerControl(position='topright', collapsed=True).add_to(mapa)

bounds = [[df['pickup_latitude'].min(), df['pickup_longitude'].min()],
        [df['pickup_latitude'].max(), df['pickup_longitude'].max()]]
mapa.fit_bounds(bounds)

mapa.save('./enhanced_map_with_layers.html')


Número de linhas no DataFrame: 44481
