## Carga de Librerias

In [1]:
import pandas as pd
import ast
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import MinMaxScaler


In [2]:
smart_inferidos = pd.read_csv('base_total_DL.csv', sep = ',')

In [3]:
smart_inferidos = smart_inferidos.drop(columns=['ParadaRegreso', 'FechaHoraRegreso'])
smart_inferidos = smart_inferidos.rename(columns={'ParadaAscenso_Nombre': 'Origen'})
smart_filtrado = smart_inferidos.copy()
# Calcular frecuencias y crear grupos
freq = smart_filtrado['CodigoZona'].value_counts()
low_freq_zones = freq[freq < 20].index

# CORRECCIÓN: Usar .loc para crear la nueva columna de forma segura
smart_filtrado.loc[:, 'CodigoZonaAgrupado'] = smart_filtrado['CodigoZona'].where(
    ~smart_filtrado['CodigoZona'].isin(low_freq_zones), 
    'r1234567'
)
# Mapeo y asignación de destino
zone_map = smart_filtrado.set_index('Origen')['CodigoZonaAgrupado'].to_dict()
# CORRECCIÓN: Usar .loc también para esta nueva columna
smart_filtrado.loc[:, 'CodigoZonaDestino'] = smart_filtrado['Destino'].map(zone_map)
# Calcular la moda de CodigoZonaDestino (excluyendo NaN)
moda_destino = smart_filtrado['CodigoZonaDestino'].mode()[0]
# Rellenar NaN con la moda (esta operación es segura como está, pero .loc también funcionaría)
smart_filtrado['CodigoZonaDestino'] = smart_filtrado['CodigoZonaDestino'].fillna(moda_destino)
smart_inferidos = smart_filtrado.copy()
#----------------------------------------


In [4]:
#PREPARACION DE VARIABLES
smart_inferidos["FechaHoraAscenso"] = pd.to_datetime(smart_inferidos["FechaHoraAscenso"])
smart_inferidos["Hora"] = smart_inferidos["FechaHoraAscenso"].dt.hour
conditions = [
    (smart_inferidos['Hora'].between(0, 4)),   # Muy bajo tráfico
    (smart_inferidos['Hora'].between(5, 6)),   # Hora pico mañana temprano
    (smart_inferidos['Hora'].between(7, 9)),   # Mañana activa
    (smart_inferidos['Hora'].between(10, 18)), # Día estable
    (smart_inferidos['Hora'].between(19, 21)), # Tarde-noche
    (smart_inferidos['Hora'] >= 22)            # Noche muy baja actividad
]
choices = ['Baja_Noche', 'Pico_Mañana_Temprano', 'Mañana_Activa', 
           'Dia_Estable', 'Tarde_Noche', 'Muy_Baja_Noche']
smart_inferidos['Grupo_Hora'] = np.select(conditions, choices, default='Sin_Grupo')
poi_cols = [
    'amenity_fast_food', 'amenity_hospital_healthcare_hospital',
    'amenity_place_of_worship_religion_christian', 'amenity_restaurant',
    'amenity_school', 'landuse_industrial', 'landuse_residential',
    'tourism_hotel', 'recreation', 'public_utility', 'food_drink',
    'education', 'leisure_swimming_pool', 'amenity_cinema', 'amenity_bench'
]
scaler_total = MinMaxScaler()
smart_inferidos['total_pois_scaled'] = scaler_total.fit_transform(smart_inferidos[['total_pois']])
poi_vector = smart_inferidos[poi_cols].to_numpy()
smart_inferidos['Destino'] = smart_inferidos['Destino'].astype(str)
smart_inferidos['Origen'] = smart_inferidos['Origen'].astype(str)
le_tarjeta = LabelEncoder()
smart_inferidos["user_id"] = le_tarjeta.fit_transform(smart_inferidos["CodigoTarjeta"])
le_linea = LabelEncoder()
smart_inferidos["linea_id"] = le_linea.fit_transform(smart_inferidos["LineaViaje"])
le_parada = LabelEncoder()
smart_inferidos["parada_ascenso_id"] = le_parada.fit_transform(smart_inferidos["Origen"])
le_destino = LabelEncoder()
smart_inferidos["parada_descenso_id"] = le_destino.fit_transform(smart_inferidos["Destino"])
le_hora = LabelEncoder()
smart_inferidos["grupo_hora"] = le_hora.fit_transform(smart_inferidos["Grupo_Hora"])
le_zona = LabelEncoder()
smart_inferidos["zona_id"] = le_zona.fit_transform(smart_inferidos["CodigoZonaAgrupado"])


## Modelo

In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, BatchNormalization, Multiply, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout, Dense  # Importación correcta

# Parámetros
#n_users = smart_inferidos["user_id"].nunique()
n_lineas = smart_inferidos["linea_id"].nunique()
n_paradas = smart_inferidos["parada_ascenso_id"].nunique()
n_destinos = smart_inferidos['parada_descenso_id'].nunique()
n_zonas = smart_inferidos["zona_id"].nunique()
n_poi_cols = 15

# Entradas
#user_input = Input(shape=(1,), name='user_id')
linea_input = Input(shape=(1,), name='linea_id')
parada_input = Input(shape=(1,), name='parada_ascenso_id')
hora_input = Input(shape=(1,), name='grupo_hora')
total_pois_input = Input(shape=(1,), name='total_pois')
poi_vector_input = Input(shape=(n_poi_cols,), name='poi_vector')
zona_origen_input = Input(shape=(1,), name='zona_id')


# Embeddings
#user_emb = Flatten()(Embedding(input_dim=n_users, output_dim=32)(user_input))
linea_emb = Flatten()(Embedding(input_dim=n_lineas, output_dim=16)(linea_input))
parada_emb = Flatten()(Embedding(input_dim=n_paradas, output_dim=16)(parada_input))
zona_emb = Flatten()(Embedding(input_dim=n_zonas, output_dim=8)(zona_origen_input))
# Concatenar
x = Concatenate()([linea_emb, hora_input, total_pois_input, poi_vector_input, parada_emb, zona_emb])
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
# Mecanismo de atención
#attention = Dense(256, activation='sigmoid')(x)
#x = Multiply()([x, attention])
output = Dense(n_destinos, activation='softmax')(x)
model = Model(inputs=[linea_input, hora_input, total_pois_input, poi_vector_input, parada_input, zona_origen_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


### Ajuste del Modelo

In [6]:
X_all = {
    #'user_id': smart_inferidos['user_id'],
    'linea_id': smart_inferidos['linea_id'],
    'grupo_hora': smart_inferidos['grupo_hora'],
    'total_pois': smart_inferidos['total_pois'],
    'poi_vector': smart_inferidos[poi_cols], 
    'parada_ascenso_id': smart_inferidos['parada_ascenso_id'],
    'zona_id': smart_inferidos['zona_id']
}
y_all = smart_inferidos['parada_descenso_id']

model.fit(X_all, y_all, batch_size=2048, epochs=20, verbose=1)


Epoch 1/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 31ms/step - accuracy: 0.0657 - loss: 5.5408
Epoch 2/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 32ms/step - accuracy: 0.1154 - loss: 4.4271
Epoch 3/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 32ms/step - accuracy: 0.1189 - loss: 4.3558
Epoch 4/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 31ms/step - accuracy: 0.1227 - loss: 4.3049
Epoch 5/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 31ms/step - accuracy: 0.1247 - loss: 4.2744
Epoch 6/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 31ms/step - accuracy: 0.1257 - loss: 4.2568
Epoch 7/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 31ms/step - accuracy: 0.1261 - loss: 4.2453
Epoch 8/20
[1m2001/2001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 31ms/step - accuracy: 0.1267 - loss: 4.2374
Epoch 9/

<keras.src.callbacks.history.History at 0x2510887ea80>

In [9]:
model.save('modelo_descenso_regulares_entrenado.keras')

### Prediccion

In [7]:
smart_nuevos = pd.read_csv('BASES_FINALES_DL/smart_predecir_irregulares.csv', sep = ',')

In [8]:
smart_nuevos = smart_nuevos.rename(columns={'CodigoParada': 'Origen'})
smart_filtrado = smart_nuevos.copy()
# Calcular frecuencias y crear grupos
freq = smart_filtrado['CodigoZona'].value_counts()
low_freq_zones = freq[freq < 20].index

# CORRECCIÓN: Usar .loc para crear la nueva columna de forma segura
smart_filtrado.loc[:, 'CodigoZonaAgrupado'] = smart_filtrado['CodigoZona'].where(
    ~smart_filtrado['CodigoZona'].isin(low_freq_zones), 
    'r1234567'
)
# Mapeo y asignación de destino
zone_map = smart_filtrado.set_index('Origen')['CodigoZonaAgrupado'].to_dict()
smart_nuevos = smart_filtrado.copy()
#----------------------------------------

In [9]:
poi_cols = [
    'amenity_fast_food', 'amenity_hospital_healthcare_hospital',
    'amenity_place_of_worship_religion_christian', 'amenity_restaurant',
    'amenity_school', 'landuse_industrial', 'landuse_residential',
    'tourism_hotel', 'recreation', 'public_utility', 'food_drink',
    'education', 'leisure_swimming_pool', 'amenity_cinema', 'amenity_bench'
]

In [10]:
# Asegúrate de que las columnas de POI estén completas en smart_nuevos
for col in poi_cols:
    if col not in smart_nuevos.columns:
        smart_nuevos[col] = 0

In [11]:
# PREPARACION DE VARIABLES
smart_nuevos["FechaHoraValidacion"] = pd.to_datetime(smart_nuevos["FechaHoraValidacion"])
smart_nuevos["Hora"] = smart_nuevos["FechaHoraValidacion"].dt.hour

conditions = [
    (smart_nuevos['Hora'].between(0, 4)),   # Muy bajo tráfico
    (smart_nuevos['Hora'].between(5, 6)),   # Hora pico mañana temprano
    (smart_nuevos['Hora'].between(7, 9)),   # Mañana activa
    (smart_nuevos['Hora'].between(10, 18)), # Día estable
    (smart_nuevos['Hora'].between(19, 21)), # Tarde-noche
    (smart_nuevos['Hora'] >= 22)            # Noche muy baja actividad
]
choices = ['Baja_Noche', 'Pico_Mañana_Temprano', 'Mañana_Activa', 
           'Dia_Estable', 'Tarde_Noche', 'Muy_Baja_Noche']
smart_nuevos['Grupo_Hora'] = np.select(conditions, choices)

# Escalar con el mismo scaler usado durante entrenamiento
smart_nuevos['total_pois_scaled'] = scaler_total.transform(smart_nuevos[['total_pois']])

# POI vector
poi_vector = smart_nuevos[poi_cols].to_numpy()

# Conversión a string para codificación
smart_nuevos['Origen'] = smart_nuevos['Origen'].astype(str)

# Aplicar transformadores ajustados anteriormente (no volver a ajustar aquí)
def transformar_con_unknown(serie, encoder, unknown_value):
    clases_set = set(encoder.classes_)
    valor_existente = encoder.classes_[0]
    serie_limpia = serie.apply(lambda x: x if x in clases_set else valor_existente)
    return encoder.transform(serie_limpia)


#smart_nuevos["user_id"] = transformar_con_unknown(smart_nuevos["CodigoTarjeta"], le_tarjeta, "unknown_user")
smart_nuevos["linea_id"] = transformar_con_unknown(smart_nuevos["CodigoLinea"], le_linea, "unknown_linea")
smart_nuevos["parada_ascenso_id"] = transformar_con_unknown(smart_nuevos["Origen"], le_parada, "unknown_parada")
smart_nuevos["grupo_hora"] = transformar_con_unknown(smart_nuevos["Grupo_Hora"], le_hora, "unknown_hora")
smart_nuevos["zona_id"] = transformar_con_unknown(smart_nuevos["CodigoZonaAgrupado"], le_zona, "unknown_zona")


<b> Predicciones

In [12]:
batch_size = 1000
results = []
for i in range(0, len(smart_nuevos), batch_size):
    batch = smart_nuevos.iloc[i:i+batch_size]
    pred = model.predict({
        #'user_id': batch['user_id'],
        'linea_id': batch['linea_id'],
        'grupo_hora': batch['grupo_hora'],
        'total_pois': batch['total_pois_scaled'],
        'poi_vector': batch[poi_cols],
        'parada_ascenso_id': batch['parada_ascenso_id'],
        'zona_id': batch['zona_id']
    }, verbose=0)
    results.append(pred)

y_pred = np.vstack(results)


In [13]:
predicho = y_pred.argmax(axis=1)

In [15]:
nombres_predichos = le_destino.inverse_transform(predicho)


In [16]:
smart_nuevos["Destino"] = nombres_predichos


In [18]:
smart_nuevos.to_csv('smart_inferidos_irregulares2', index = False)

In [17]:
smart_nuevos

Unnamed: 0,Origen,NombreParada,CodigoLinea,NombreLinea,FechaHoraValidacion,CodigoTarjeta,CompaniaT,RegistroMunicipal,TipoTarifa,Categoria,...,parada_ascenso_id,total_pois_scaled,CodigoZonaAgrupado,leisure_swimming_pool,amenity_cinema,amenity_bench,Grupo_Hora,grupo_hora,zona_id,Destino
0,2425,AV. HUAYNA-CAPAC,7004,14,2023-10-02 12:59:08,CURS0030270317,07 - COMCUETU S.A,469,0,3,...,607,0.226994,r3845814,0,0,0,Dia_Estable,1,23,3368
1,2425,AV. HUAYNA-CAPAC,7004,14,2023-10-03 13:12:22,CURS0030270317,07 - COMCUETU S.A,469,0,3,...,607,0.226994,r3845814,0,0,0,Dia_Estable,1,23,3368
2,2959,COLEGIO TECNICO SALESIANO,6007,22,2023-10-01 14:18:45,CURS0030262926,06 - COMTRANUTOME S.A,287,0,3,...,891,0.500000,r3845861,0,0,0,Dia_Estable,1,32,1676
3,2193,MERCADO 10 DE AGOSTO,1002,16,2023-10-12 12:19:40,CURS0030262926,01 - LANCOMTRI S.A,9,0,3,...,519,0.576687,r3845817,0,0,0,Dia_Estable,1,26,1886
4,3127,LOS SAUCES,2002,18,2023-10-22 12:24:47,CURS0030262930,02 - URBADIEZ S.A,70,0,3,...,1001,0.500000,r3845861,0,0,0,Dia_Estable,1,32,3442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1033513,1572,TARQUI Y PIO BRAVO,1002,16,2023-10-30 13:01:57,CURS0030294028,01 - LANCOMTRI S.A,2,0,3,...,293,0.180982,r3845825,0,0,0,Dia_Estable,1,27,767
1033514,2283,REDONDEL GONZALES SUAREZ,7002,3,2023-10-25 10:52:56,CURS0030294510,07 - COMCUETU S.A,407,0,3,...,554,0.122699,r3845812,0,0,0,Dia_Estable,1,21,1524
1033515,1589,MARIA AUXILIADORA,5008,El Carmen,2023-10-31 19:12:58,CURS0030294573,05 - COMTUBANOSSA,220,0,3,...,297,0.180982,r3845825,0,0,0,Tarde_Noche,5,27,2811
1033516,1702,TERMINAL TERRESTRE,4001,7,2023-10-31 12:03:47,CURS0030294590,04 - UNCOMETRO S.A,184,0,3,...,333,0.162577,r3845815,0,0,0,Dia_Estable,1,24,2773
