In [2]:
import community as community_louvain
import folium
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import networkx as nx
import numpy as np
import pandas as pd

from collections import Counter
from matplotlib import cm
from matplotlib.colors import Normalize, to_hex

dataset_name = '2025-01'
data_file = f"data/{dataset_name}.csv"
geo_locations_file = 'ecobici_stations.csv'
map_output_file = f"maps/communities_{dataset_name}.html"
data = pd.read_csv(data_file)
print(dataset_name)

# Ensure datetime parsing
data['FechaHora_Retiro']= pd.to_datetime(data['Fecha_Retiro'] + ' ' + data['Hora_Retiro'], dayfirst=True,)
data['FechaHora_Arribo'] = pd.to_datetime(data['Fecha_Arribo'] + ' ' + data['Hora_Arribo'], dayfirst=True,)
print('Finished computing dates and hours')

# Compute ride duration in minutes
data['duracion_min'] = (data['FechaHora_Arribo'] - data['FechaHora_Retiro']).dt.total_seconds() / 60
print('Finished computing trip durations')

# Only keep meaningful trips
data = data[(data['duracion_min'] > 3) & (data['duracion_min'] < 120)]

print(data.columns)
data.head()

2025-01


KeyboardInterrupt: 

In [None]:
# Create stations' DataFrame
estaciones_retiro = data['Ciclo_Estacion_Retiro'].unique()
estaciones_arribo = data['Ciclo_EstacionArribo'].unique()
estaciones = list( set(estaciones_retiro).intersection(set(estaciones_arribo)))

df1 = pd.DataFrame({'Ciclo_Estacion_Retiro': estaciones_retiro})
df2 = pd.DataFrame({'Ciclo_EstacionArribo': estaciones_arribo})
df = df1.merge(df2, how='cross')
df.head()

In [None]:
# Create graph
trip_counts = data.groupby(['Ciclo_Estacion_Retiro', 'Ciclo_EstacionArribo']).size().reset_index(name='weight')
weighted_edges = df.merge(trip_counts, on=['Ciclo_Estacion_Retiro', 'Ciclo_EstacionArribo'], how='left')
weighted_edges['weight'] = weighted_edges['weight'].fillna(0).astype(int)

# Filter only valid edges (i.e. weight > 0)
edges = weighted_edges[weighted_edges['weight'] > 0][['Ciclo_Estacion_Retiro', 'Ciclo_EstacionArribo', 'weight']]

# Build directed graph
G = nx.DiGraph()

for _, row in edges.iterrows():
    G.add_edge(row['Ciclo_Estacion_Retiro'], row['Ciclo_EstacionArribo'], weight=row['weight'])

G_undirected = G.to_undirected()


In [None]:
# Detect communities
partition = community_louvain.best_partition(G_undirected, weight='weight')
community_sizes = Counter(partition.values())


print("Número de comunidades:", len(community_sizes))
print("Tamaño de comunidades:", community_sizes)

In [None]:
##### Plot communities ###
geo_locations = pd.read_csv(geo_locations_file)
geo_locations['station_id'] = geo_locations['num_cicloestacion'].astype(str).str.zfill(3)

# Convert partition dict to DataFrame
community_df = pd.DataFrame.from_dict(partition, orient='index', columns=['community'])
community_df.reset_index(inplace=True)
community_df.rename(columns={'index': 'station_id'}, inplace=True)

# Merge with geolocation data
geo_with_communities = geo_locations.merge(community_df, on='station_id', how='inner')

center_lat = geo_with_communities['latitud'].mean()
center_lon = geo_with_communities['longitud'].mean()
