In [1]:
from Libreria.cargar_configbd import cargar_configbd
from Libreria.cargar_mes_a_procesar import cargar_mes_a_procesar

import pandas as pd
import numpy as np


In [2]:
# Conectar a la base de datos PostgreSQL
conn = cargar_configbd.conectar_base_datos('conf_bd.txt')
cur = conn.cursor()

In [3]:
mes, año = cargar_mes_a_procesar.leer_csv_en_lista('mes_a_procesar.csv')
mes = 'sept'
año = 2023
print(f"{mes}{año}")

sept2023


In [4]:
# ---- Obtener los ids de los segmentos tras la segmentacion
cur.execute(f"SELECT DISTINCT tripid, segid, uid FROM segmentos_viajes_{mes}{año} order by uid")
rows = cur.fetchall()
id_segmentos = rows.copy()
len(id_segmentos)

35864

In [5]:
# -----Agregar columnas para caracteristicas nuevas
consulta_sql = f'''DROP TABLE IF EXISTS estadisticos_segmentos_{mes}{año};
                  CREATE TABLE estadisticos_segmentos_{mes}{año} (
                      tripid INTEGER,
                      segid INTEGER,
                      uid VARCHAR(255),
                      p25_speed FLOAT, 
                      p75_speed FLOAT,
                      max_speed FLOAT,
                      min_speed FLOAT,
                      mean_speed FLOAT,
                      median_speed FLOAT,
                      var_speed FLOAT,
                      p25_aceleration FLOAT, 
                      p75_aceleration FLOAT,
                      max_aceleration FLOAT,
                      min_aceleration FLOAT,
                      mean_aceleration FLOAT,
                      median_aceleration FLOAT,
                      var_aceleration FLOAT,
                      p25_bearing FLOAT, 
                      p75_bearing FLOAT,
                      max_bearing FLOAT,
                      min_bearing FLOAT,
                      mean_bearing FLOAT,
                      median_bearing FLOAT,
                      var_bearing FLOAT,
                      p25_time_stop FLOAT, 
                      p75_time_stop FLOAT,
                      max_time_stop FLOAT,
                      min_time_stop FLOAT,
                      mean_time_stop FLOAT,
                      median_time_stop FLOAT,
                      var_time_stop FLOAT,
                      stops INTEGER
                  );
                '''
cur.execute(consulta_sql)
cur.execute(f"commit")

In [6]:
def tiempos_espera(df_puntos_modo, umbral_velocidad, tiempo_minimo_parada):
    df_puntos_modo['recorded_at'] = pd.to_datetime(df_puntos_modo['recorded_at'])

    # Ordenar los datos por el tiempo grabado
    df_puntos_modo.sort_values('recorded_at', inplace=True)

    # Calcular la diferencia de tiempo entre cada punto
    df_puntos_modo['time_diff'] = df_puntos_modo['recorded_at'].diff().dt.total_seconds()

    # Calcular la diferencia de distancia entre cada punto
    df_puntos_modo['distance_diff'] = df_puntos_modo['distance'].diff()

    # Calcular la velocidad entre cada punto
    df_puntos_modo['speed'] = df_puntos_modo['distance_diff'] / df_puntos_modo['time_diff']

    # Identificar los puntos de espera
    wait_points = df_puntos_modo[(df_puntos_modo['speed'] < umbral_velocidad) & (df_puntos_modo['time_diff'] >= tiempo_minimo_parada)]

    # Retornar los tiempos de espera
    return wait_points['time_diff']

In [7]:
print(f"n_viajes: {len(id_segmentos)}")

prog_anterior = 1 # En uno aun no precesa nada
id_segmentos = id_segmentos[prog_anterior - 1:]
prog = prog_anterior - 1
for id_viaje in id_segmentos:
    prog += 1
    print(f"Progreso: {prog}", end="\r")
    
    # Obtener los ids de todos los segmentos de transporte
    cur.execute(f"SELECT * FROM segmentos_viajes_{mes}{año} where tripid='{id_viaje[0]}' and segid = '{id_viaje[1]}' and uid='{id_viaje[2]}' order by recorded_at")
    #print(id_viaje[0], id_viaje[1], id_viaje[2])
    rows = cur.fetchall()
    puntos_viaje = rows.copy()

    df = pd.DataFrame(puntos_viaje, columns=[desc[0] for desc in cur.description])
    if len(df) < 1:
        continue

    p25_speed, p75_speed = np.percentile(df['speed'], [25, 75])
    max_speed = df['speed'].max()
    min_speed = df['speed'].min()
    mean_speed = df['speed'].mean()
    median_speed = df['speed'].median()
    var_speed = df['speed'].var()
    #skewness_speed = df['speed'].skew()  # Asimetría
    #kurtosis_speed = df['speed'].kurtosis()  # Curtosis

    p25_aceleracion, p75_aceleracion = np.percentile(df['aceleration'], [25, 75])
    max_aceleracion = df['aceleration'].max()
    min_aceleracion= df['aceleration'].min()
    mean_aceleration = df['aceleration'].mean()
    median_aceleration = df['aceleration'].median()
    var_aceleracion  = df['aceleration'].var()
    #skewness_aceleration  = df['aceleration'].skew()  # Asimetría
    #kurtosis_aceleration  = df['aceleration'].kurtosis()  # Curtosis

    p25_bearing, p75_bearing = np.percentile(df['bearing'], [25, 75])
    max_bearing = df['bearing'].max()
    min_bearing = df['bearing'].min()
    mean_bearing = df['bearing'].mean()
    median_bearing = df['bearing'].median()
    var_bearing  = df['bearing'].var()
    #skewness_bearing  = df['bearing'].skew()  # Asimetría
    #kurtosis_bearing  = df['bearing'].kurtosis()  # Curtosis
    
    t_paradas = tiempos_espera(df, umbral_velocidad=0.5, tiempo_minimo_parada=5)
    if len(t_paradas) > 0:
        p25_time_stop, p75_time_stop = np.percentile(t_paradas, [25, 75])
        max_time_stop = np.max(t_paradas)
        min_time_stop = np.min(t_paradas)
        mean_time_stop = np.mean(t_paradas)
        median_time_stop = np.median(t_paradas)
        var_time_stop = np.var(t_paradas)
    else:
        p25_time_stop, p75_time_stop, max_time_stop, min_time_stop, mean_time_stop, median_time_stop, var_time_stop = (None, None, None, None, None, None, None)

    stops = len(t_paradas)

    datos = tuple(id_viaje) + (
        p25_speed, p75_speed, max_speed, min_speed, mean_speed, median_speed, var_speed,
        p25_aceleracion, p75_aceleracion, max_aceleracion, min_aceleracion, mean_aceleration, median_aceleration, var_aceleracion,
        p25_bearing, p75_bearing, max_bearing, min_bearing, mean_bearing, median_bearing, var_bearing,
        p25_time_stop, p75_time_stop, max_time_stop, min_time_stop, mean_time_stop, median_time_stop, var_time_stop,
        stops
    )

    consulta_sql = """
    INSERT INTO {} VALUES (
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
        %s, %s
    )
    """.format(f'estadisticos_segmentos_{mes}{año}')

    cur.execute(consulta_sql, datos)
    conn.commit()

n_viajes: 35864
Progreso: 35864