### Librerias

In [13]:
import numpy as np
import pandas as pd
import plotly.express as px


### Datos

In [14]:
station_locations = pd.read_csv('../process_data/stations_all_info.csv')
classic_trips = pd.read_csv('../process_data/trips_all_info.csv')

### Plot

In [15]:
def plot_station_locations(df, column_name, title):
    fig = px.scatter_map(
        df,
        lat="lat",
        lon="long",
        color=f"{column_name}_cat",
        color_discrete_sequence=df[f'{column_name}_color'].unique(),
        size=df[column_name].abs(),
        hover_name="name",
        hover_data={f"{column_name}_cat": True},
        size_max=15,
        zoom=12,
        title=title,
    )
    fig.show()

### Número de Viajes

In [16]:
def trips_by_station(classic_trips, nr_stations=len(station_locations)):

    # Group by 'Start date', 'Start station number', 'Start station', 'End station number', and 'End station'
    grouped = classic_trips.groupby(['Start station number', 'Start station', 'End station number', 'End station']).size().reset_index(name='count').sort_values(by='count', ascending=False)
    
    # Get unique station ids
    station_numbers = list(set(grouped['Start station number']).union(set(grouped['End station number'])))
    station_numbers.sort()
    
    # Create a dictionary to map station numbers to their indices
    station_nr_to_index = {station: idx for idx, station in enumerate(station_numbers)}
    
    # Initialize the dictionary of matrices
    matrix = np.zeros((nr_stations, nr_stations), dtype=int)
    
    # Populate the matrix
    for _, row in grouped.iterrows():
        start_idx = station_nr_to_index[row['Start station number']]
        end_idx = station_nr_to_index[row['End station number']]
        matrix[start_idx, end_idx] += row['count']

    return matrix

def total_trips_per_station(matrix):

    outgoing = matrix.sum(axis=1)
    incoming = matrix.sum(axis=0)
    diagonal = np.diag(matrix)
    return outgoing + incoming - diagonal

def net_flow_per_station(matrix):
    
    outgoing = matrix.sum(axis=1)
    incoming = matrix.sum(axis=0)
    return outgoing - incoming

In [17]:
def categorize_column(df, column_name):
    """Categoriza valores y asigna colores"""
    values = df[column_name]
    hist, bins = np.histogram(values, bins=5)
    
    labels = [
        f'< {bins[1]:.0f}',
        f'{bins[1]:.0f} to {bins[2]:.0f}',
        f'{bins[2]:.0f} to {bins[3]:.0f}',
        f'{bins[3]:.0f} to {bins[4]:.0f}',
        f'> {bins[4]:.0f}'
    ]
    
    color_map = {
        labels[0]: '#660000',  # Rojo muy oscuro
        labels[1]: '#CC3333',  # Rojo oscuro
        labels[2]: '#E6B3B3',  # Rosa oscuro
        labels[3]: '#336633',  # Verde oscuro
        labels[4]: '#004D00'   # Verde muy oscuro
    }
    
    categories = pd.cut(values, bins=bins, labels=labels, include_lowest=True)
    colors = categories.map(color_map)
    
    return categories, colors

### Main

In [18]:
sample_matrix = trips_by_station(classic_trips)
total_trips = total_trips_per_station(sample_matrix)
net_flow = net_flow_per_station(sample_matrix)

station_locations['total_trips'] = total_trips
station_locations['net_flow'] = net_flow

# Aplicar categorización con colores
for col in ['nbDocks', 'total_trips', 'net_flow']:
    station_locations[f'{col}_cat'], station_locations[f'{col}_color'] = categorize_column(station_locations, col)

plot_station_locations(station_locations, "nbDocks", "Capacidad de las Estaciones")
plot_station_locations(station_locations, "total_trips", "Viajes Totales por Estación")
plot_station_locations(station_locations, "net_flow", "Flujo Neto de Viajes por Estación")