## Validacion

In [None]:
import pandas as pd
import numpy as np
import folium
import time
from geopy.distance import geodesic
import os
import json
from collections import defaultdict, deque
import random
from typing import Tuple, List, Dict, Optional
from dataclasses import dataclass

@dataclass
class RLConfig:
    """Configuración de parámetros de Reinforcement Learning."""
    learning_rate: float = 0.2
    discount_factor: float = 0.9
    epsilon_start: float = 1.0
    epsilon_end: float = 0.1
    epsilon_decay: float = 0.98
    max_episodes: int = 100
    max_steps_per_episode: int = 25
    memory_size: int = 5000
    batch_size: int = 16
    target_update_freq: int = 50

@dataclass
class SystemConfig:
    """Configuración general del sistema."""
    cantidad_de_rutas: int = 3
    max_paradas_por_ruta: int = 15
    radio_pois_metros: float = 500.0
    mejora_minima_requerida: float = 25.0
    distancia_maxima_movimiento: float = 150.0
    crear_mapas_individuales: bool = True
    crear_reportes: bool = True
    verbose: bool = True
    distancia_a_calle_maxima: float = 30.0

class BusStopEnvironment:
    """Ambiente de RL para optimización de paradas de bus."""
    
    def __init__(self, route_stops_df, colegios_df, hospitales_df, route_info, system_config):
        """
        Inicializa el ambiente de RL para una ruta específica.
        
        Args:
            route_stops_df (pd.DataFrame): DataFrame con paradas de la ruta
            colegios_df (pd.DataFrame): DataFrame con colegios
            hospitales_df (pd.DataFrame): DataFrame con hospitales
            route_info (dict): Información de la ruta {'linea': X, 'sentido': Y}
            system_config (SystemConfig): Configuración del sistema
        """
        self.route_stops_df = route_stops_df
        self.colegios_df = colegios_df
        self.hospitales_df = hospitales_df
        self.route_info = route_info
        self.system_config = system_config
        
        self.current_stop_data = None
        self.original_position = None
        self.current_position = None
        self.steps_taken = 0
        self.max_steps = 50
        
        self._filter_nearby_pois()
        
        self.action_space_size = 40
        self.directions = [
            (-1, -1), (-1, 0), (-1, 1),
            (0, -1),           (0, 1),
            (1, -1),  (1, 0),  (1, 1)
        ]
        self.distances = [30, 60, 90, 120, 150]
        self.state_size = 6
        
        if self.system_config.verbose:
            print(f"Ruta {route_info['linea']}-{route_info['sentido']}: {len(self.route_stops_df)} paradas, "
                  f"Colegios: {len(self.nearby_colegios)}, Hospitales: {len(self.nearby_hospitales)}")
    
    def _filter_nearby_pois(self):
        """Filtra POIs que están dentro del radio especificado de alguna parada de la ruta."""
        nearby_colegios = []
        nearby_hospitales = []
        
        for _, stop in self.route_stops_df.iterrows():
            stop_pos = (stop['lat'], stop['lon'])
            
            for _, colegio in self.colegios_df.iterrows():
                colegio_pos = (colegio['lat'], colegio['lon'])
                if geodesic(stop_pos, colegio_pos).meters <= self.system_config.radio_pois_metros:
                    nearby_colegios.append(colegio)
            
            for _, hospital in self.hospitales_df.iterrows():
                hospital_pos = (hospital['lat'], hospital['lon'])
                if geodesic(stop_pos, hospital_pos).meters <= self.system_config.radio_pois_metros:
                    nearby_hospitales.append(hospital)
        
        self.nearby_colegios = pd.DataFrame(nearby_colegios).drop_duplicates().reset_index(drop=True)
        self.nearby_hospitales = pd.DataFrame(nearby_hospitales).drop_duplicates().reset_index(drop=True)
        
        if self.nearby_colegios.empty:
            self.nearby_colegios = pd.DataFrame(columns=['nombre', 'lat', 'lon'])
        if self.nearby_hospitales.empty:
            self.nearby_hospitales = pd.DataFrame(columns=['nombre', 'lat', 'lon'])
    
    def reset(self, stop_data: Dict) -> np.ndarray:
        """
        Reinicia el ambiente para una nueva parada.
        
        Args:
            stop_data (Dict): Datos de la parada a optimizar
            
        Returns:
            np.ndarray: Estado inicial del ambiente
        """
        self.current_stop_data = stop_data.copy()
        self.original_position = (stop_data['lat'], stop_data['lon'])
        self.current_position = self.original_position
        self.steps_taken = 0
        
        return self._get_state()
    
    def _get_state(self) -> np.ndarray:
        """
        Obtiene el estado actual del ambiente.
        
        Returns:
            np.ndarray: Vector de estado [lat, lon, dist_colegio_min, dist_hospital_min, en_calle, dist_a_calle]
        """
        lat, lon = self.current_position
        
        if not self.nearby_colegios.empty:
            dist_colegios = [
                geodesic((lat, lon), (row['lat'], row['lon'])).meters
                for _, row in self.nearby_colegios.iterrows()
            ]
            min_dist_colegio = min(dist_colegios)
        else:
            min_dist_colegio = 1000.0
        
        if not self.nearby_hospitales.empty:
            dist_hospitales = [
                geodesic((lat, lon), (row['lat'], row['lon'])).meters
                for _, row in self.nearby_hospitales.iterrows()
            ]
            min_dist_hospital = min(dist_hospitales)
        else:
            min_dist_hospital = 1000.0
        
        en_calle, dist_a_calle = self._is_on_street(lat, lon)
        
        return np.array([
            lat, lon, 
            min_dist_colegio / 1000.0,
            min_dist_hospital / 1000.0,
            float(en_calle),
            dist_a_calle / 100.0
        ])
    
    def _is_on_street(self, lat: float, lon: float) -> Tuple[bool, float]:
        """
        Verifica si una posición está en una calle usando heurística de grilla.
        
        Args:
            lat (float): Latitud
            lon (float): Longitud
            
        Returns:
            Tuple[bool, float]: (está_en_calle, distancia_a_calle)
        """
        lat_local = (lat + 31.4) * 100000
        lon_local = (lon + 64.2) * 100000
        
        lat_remainder = lat_local % 100
        lon_remainder = lon_local % 100
        
        cerca_grilla_lat = (lat_remainder < 15 or lat_remainder > 85)
        cerca_grilla_lon = (lon_remainder < 15 or lon_remainder > 85)
        
        en_calle = cerca_grilla_lat or cerca_grilla_lon
        
        if en_calle:
            dist_a_calle = 0
        else:
            dist_lat = min(lat_remainder, 100 - lat_remainder)
            dist_lon = min(lon_remainder, 100 - lon_remainder)
            dist_a_calle = min(dist_lat, dist_lon) * 1.11
        
        return en_calle, dist_a_calle
    
    def _move_to_nearest_street(self, lat: float, lon: float) -> Tuple[float, float]:
        """
        Mueve una posición a la calle más cercana si está en una cuadra.
        
        Args:
            lat (float): Latitud actual
            lon (float): Longitud actual
            
        Returns:
            Tuple[float, float]: Nueva latitud y longitud en calle
        """
        en_calle, dist_a_calle = self._is_on_street(lat, lon)
        
        if en_calle or dist_a_calle > self.system_config.distancia_a_calle_maxima:
            return lat, lon
        
        lat_local = (lat + 31.4) * 100000
        lon_local = (lon + 64.2) * 100000
        
        lat_remainder = lat_local % 100
        lon_remainder = lon_local % 100
        
        if lat_remainder < 50:
            lat_local = lat_local - lat_remainder
        else:
            lat_local = lat_local + (100 - lat_remainder)
        
        if lon_remainder < 50:
            lon_local = lon_local - lon_remainder
        else:
            lon_local = lon_local + (100 - lon_remainder)
        
        new_lat = (lat_local / 100000) - 31.4
        new_lon = (lon_local / 100000) - 64.2
        
        return new_lat, new_lon
    
    def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]:
        """
        Ejecuta una acción en el ambiente.
        
        Args:
            action (int): Índice de la acción a ejecutar
            
        Returns:
            Tuple[np.ndarray, float, bool, Dict]: (nuevo_estado, recompensa, terminado, info)
        """
        self.steps_taken += 1
        
        direction_idx = action // len(self.distances)
        distance_idx = action % len(self.distances)
        
        direction = self.directions[direction_idx]
        distance = self.distances[distance_idx]
        
        lat, lon = self.current_position
        
        distance_deg_lat = distance / 111000
        distance_deg_lon = distance / (111000 * abs(np.cos(np.radians(lat))))
        
        new_lat = lat + (direction[0] * distance_deg_lat)
        new_lon = lon + (direction[1] * distance_deg_lon)
        
        if not (-32.0 <= new_lat <= -31.0 and -65.0 <= new_lon <= -64.0):
            reward = -50
            done = True
            info = {'invalid_position': True}
            return self._get_state(), reward, done, info
        
        old_position = self.current_position
        self.current_position = (new_lat, new_lon)
        
        reward = self._calculate_reward(old_position, self.current_position)
        
        done = (self.steps_taken >= self.max_steps) or (reward > 100)
        
        info = {
            'steps': self.steps_taken,
            'improvement': reward,
            'position_change': geodesic(old_position, self.current_position).meters
        }
        
        return self._get_state(), reward, done, info
    
    def _calculate_reward(self, old_pos: Tuple[float, float], new_pos: Tuple[float, float]) -> float:
        """
        Calcula la recompensa basada en la mejora de posición.
        
        Args:
            old_pos (Tuple[float, float]): Posición anterior
            new_pos (Tuple[float, float]): Nueva posición
            
        Returns:
            float: Recompensa calculada
        """
        old_dist_colegio = self._min_distance_to_pois(old_pos, self.nearby_colegios)
        old_dist_hospital = self._min_distance_to_pois(old_pos, self.nearby_hospitales)
        old_min_poi = min(old_dist_colegio, old_dist_hospital)
        
        new_dist_colegio = self._min_distance_to_pois(new_pos, self.nearby_colegios)
        new_dist_hospital = self._min_distance_to_pois(new_pos, self.nearby_hospitales)
        new_min_poi = min(new_dist_colegio, new_dist_hospital)
        
        poi_improvement = old_min_poi - new_min_poi
        
        en_calle, dist_calle = self._is_on_street(new_pos[0], new_pos[1])
        street_penalty = 0 if en_calle else -dist_calle * 0.5
        
        movement_distance = geodesic(old_pos, new_pos).meters
        movement_penalty = -movement_distance * 0.1
        
        significant_bonus = 50 if poi_improvement > 50 else 0
        
        total_reward = poi_improvement + street_penalty + movement_penalty + significant_bonus
        
        return total_reward
    
    def _min_distance_to_pois(self, position: Tuple[float, float], pois_df: pd.DataFrame) -> float:
        """
        Calcula la distancia mínima a POIs.
        
        Args:
            position (Tuple[float, float]): Posición de referencia
            pois_df (pd.DataFrame): DataFrame con POIs
            
        Returns:
            float: Distancia mínima en metros
        """
        if pois_df.empty:
            return 1000.0
        
        distances = [
            geodesic(position, (row['lat'], row['lon'])).meters
            for _, row in pois_df.iterrows()
        ]
        return min(distances)

class QLearningAgent:
    """Agente Q-Learning para optimización de paradas."""
    
    def __init__(self, state_size: int, action_size: int, config: RLConfig):
        """
        Inicializa el agente Q-Learning.
        
        Args:
            state_size (int): Tamaño del espacio de estados
            action_size (int): Tamaño del espacio de acciones
            config (RLConfig): Configuración de RL
        """
        self.state_size = state_size
        self.action_size = action_size
        self.config = config
        
        self.q_table = defaultdict(lambda: np.zeros(action_size))
        self.epsilon = config.epsilon_start
        self.memory = deque(maxlen=config.memory_size)
        
        self.training_stats = {
            'episodes': [],
            'rewards': [],
            'steps': [],
            'epsilon_values': []
        }
    
    def _discretize_state(self, state: np.ndarray) -> str:
        """
        Discretiza el estado continuo para la tabla Q.
        
        Args:
            state (np.ndarray): Estado continuo
            
        Returns:
            str: Clave del estado discretizado
        """
        lat_bin = int((state[0] + 31.5) * 50) // 5
        lon_bin = int((state[1] + 64.5) * 50) // 5
        dist_colegio_bin = min(int(state[2] * 5), 10)
        dist_hospital_bin = min(int(state[3] * 5), 10)
        en_calle_bin = int(state[4])
        dist_calle_bin = min(int(state[5]), 5)
        
        return f"{lat_bin}_{lon_bin}_{dist_colegio_bin}_{dist_hospital_bin}_{en_calle_bin}_{dist_calle_bin}"
    
    def choose_action(self, state: np.ndarray, training: bool = True) -> int:
        """
        Elige una acción usando estrategia epsilon-greedy.
        
        Args:
            state (np.ndarray): Estado actual
            training (bool): Si está en modo entrenamiento
            
        Returns:
            int: Índice de la acción elegida
        """
        state_key = self._discretize_state(state)
        
        if training and random.random() < self.epsilon:
            return random.randint(0, self.action_size - 1)
        else:
            q_values = self.q_table[state_key]
            return np.argmax(q_values)
    
    def learn(self, state: np.ndarray, action: int, reward: float, 
              next_state: np.ndarray, done: bool):
        """
        Actualiza la tabla Q basado en la experiencia.
        
        Args:
            state (np.ndarray): Estado actual
            action (int): Acción tomada
            reward (float): Recompensa recibida
            next_state (np.ndarray): Siguiente estado
            done (bool): Si el episodio terminó
        """
        state_key = self._discretize_state(state)
        next_state_key = self._discretize_state(next_state)
        
        current_q = self.q_table[state_key][action]
        
        if done:
            target_q = reward
        else:
            next_max_q = np.max(self.q_table[next_state_key])
            target_q = reward + self.config.discount_factor * next_max_q
        
        self.q_table[state_key][action] = current_q + self.config.learning_rate * (target_q - current_q)
        
        if self.epsilon > self.config.epsilon_end:
            self.epsilon *= self.config.epsilon_decay
    
    def train_episode(self, env: BusStopEnvironment, stop_data: Dict) -> Dict:
        """
        Entrena un episodio completo.
        
        Args:
            env (BusStopEnvironment): Ambiente de entrenamiento
            stop_data (Dict): Datos de la parada
            
        Returns:
            Dict: Resultados del episodio
        """
        state = env.reset(stop_data)
        total_reward = 0
        steps = 0
        
        for step in range(self.config.max_steps_per_episode):
            action = self.choose_action(state, training=True)
            next_state, reward, done, info = env.step(action)
            
            self.learn(state, action, reward, next_state, done)
            
            state = next_state
            total_reward += reward
            steps += 1
            
            if done:
                break
        
        return {
            'total_reward': total_reward,
            'steps': steps,
            'final_position': env.current_position,
            'improvement': total_reward
        }

class RouteOptimizer:
    """Optimizador de una ruta específica usando RL."""
    
    def __init__(self, route_info: Dict, rl_config: RLConfig, system_config: SystemConfig):
        """
        Inicializa el optimizador de ruta.
        
        Args:
            route_info (Dict): Información de la ruta {'linea': X, 'sentido': Y}
            rl_config (RLConfig): Configuración de RL
            system_config (SystemConfig): Configuración del sistema
        """
        self.route_info = route_info
        self.rl_config = rl_config
        self.system_config = system_config
        
        self.route_stops_df = None
        self.colegios_df = None
        self.hospitales_df = None
        
        self.environment = None
        self.agent = None
        
        self.optimized_stops = []
        self.training_history = {}
    
    def load_route_data(self, all_stops_df, colegios_df, hospitales_df):
        """
        Carga los datos específicos de esta ruta.
        
        Args:
            all_stops_df (pd.DataFrame): DataFrame con todas las paradas
            colegios_df (pd.DataFrame): DataFrame con colegios
            hospitales_df (pd.DataFrame): DataFrame con hospitales
        """
        self.route_stops_df = all_stops_df[
            (all_stops_df['linea'] == self.route_info['linea']) & 
            (all_stops_df['sentido'] == self.route_info['sentido'])
        ].head(self.system_config.max_paradas_por_ruta).copy()
        
        self.colegios_df = colegios_df.copy()
        self.hospitales_df = hospitales_df.copy()
        
        if self.system_config.verbose:
            print(f"Ruta {self.route_info['linea']}-{self.route_info['sentido']}: {len(self.route_stops_df)} paradas")
    
    def setup_rl_components(self):
        """Configura el ambiente y agente de RL para esta ruta."""
        self.environment = BusStopEnvironment(
            self.route_stops_df,
            self.colegios_df, 
            self.hospitales_df,
            self.route_info,
            self.system_config
        )
        
        self.agent = QLearningAgent(
            state_size=self.environment.state_size,
            action_size=self.environment.action_space_size,
            config=self.rl_config
        )
    
    def train_agent(self):
        """Entrena el agente en las paradas de esta ruta."""
        if self.system_config.verbose:
            print(f"Entrenando RL para {len(self.route_stops_df)} paradas...")
        
        training_results = []
        
        for episode in range(self.rl_config.max_episodes):
            stop_data = self.route_stops_df.sample(1).iloc[0].to_dict()
            result = self.agent.train_episode(self.environment, stop_data)
            training_results.append(result)
            
            self.agent.training_stats['episodes'].append(episode)
            self.agent.training_stats['rewards'].append(result['total_reward'])
            self.agent.training_stats['steps'].append(result['steps'])
            self.agent.training_stats['epsilon_values'].append(self.agent.epsilon)
        
        self.training_history = {
            'final_epsilon': self.agent.epsilon,
            'avg_final_reward': np.mean([r['total_reward'] for r in training_results[-20:]]),
            'total_episodes': len(training_results)
        }
    
    def optimize_all_stops(self):
        """Optimiza todas las paradas de esta ruta."""
        if self.system_config.verbose:
            print(f"Optimizando {len(self.route_stops_df)} paradas...")
        
        optimized_count = 0
        
        for _, stop in self.route_stops_df.iterrows():
            try:
                result = self._optimize_single_stop(stop.to_dict())
                result.update({
                    'linea': self.route_info['linea'],
                    'sentido': self.route_info['sentido']
                })
                
                self.optimized_stops.append(result)
                
                if result['relocated']:
                    optimized_count += 1
                        
            except Exception as e:
                error_result = stop.to_dict()
                error_result.update({
                    'relocated': False,
                    'error': str(e),
                    'linea': self.route_info['linea'],
                    'sentido': self.route_info['sentido']
                })
                self.optimized_stops.append(error_result)
        
        if self.system_config.verbose:
            print(f"Resultado: {optimized_count}/{len(self.route_stops_df)} paradas optimizadas")
    
    def _optimize_single_stop(self, stop_data: Dict) -> Dict:
        """
        Optimiza una parada individual.
        
        Args:
            stop_data (Dict): Datos de la parada
            
        Returns:
            Dict: Resultado de la optimización
        """
        state = self.environment.reset(stop_data)
        
        total_steps = 0
        
        for step in range(self.rl_config.max_steps_per_episode):
            action = self.agent.choose_action(state, training=False)
            next_state, reward, done, info = self.environment.step(action)
            
            state = next_state
            total_steps += 1
            
            if done or reward > 50:
                break
        
        rl_final_pos = self.environment.current_position
        street_final_pos = self.environment._move_to_nearest_street(rl_final_pos[0], rl_final_pos[1])
        
        original_pos = (stop_data['lat'], stop_data['lon'])
        
        original_poi_dist = min(
            self.environment._min_distance_to_pois(original_pos, self.environment.nearby_colegios),
            self.environment._min_distance_to_pois(original_pos, self.environment.nearby_hospitales)
        )
        
        street_poi_dist = min(
            self.environment._min_distance_to_pois(street_final_pos, self.environment.nearby_colegios),
            self.environment._min_distance_to_pois(street_final_pos, self.environment.nearby_hospitales)
        )
        
        improvement = original_poi_dist - street_poi_dist
        
        result = stop_data.copy()
        result.update({
            'relocated': improvement > self.system_config.mejora_minima_requerida,
            'improvement_meters': improvement,
            'rl_steps_taken': total_steps,
            'rl_raw_position': rl_final_pos,
            'street_adjusted_position': street_final_pos,
        })
        
        if result['relocated']:
            result.update({
                'lat_original': original_pos[0],
                'lon_original': original_pos[1],
                'lat': street_final_pos[0],
                'lon': street_final_pos[1],
                'movement_distance_meters': geodesic(original_pos, street_final_pos).meters
            })
        
        return result
    
    def create_route_map(self):
        """
        Crea un mapa interactivo específico de esta ruta.
        
        Returns:
            folium.Map: Mapa de la ruta optimizada
        """
        if not self.optimized_stops:
            return None
        
        route_name = f"{self.route_info['linea']}-{self.route_info['sentido']}"
        
        lats = [stop['lat'] for stop in self.optimized_stops]
        lons = [stop['lon'] for stop in self.optimized_stops]
        
        center_lat = np.mean(lats)
        center_lon = np.mean(lons)
        
        m = folium.Map(
            location=[center_lat, center_lon],
            zoom_start=14,
            tiles='OpenStreetMap'
        )
        
        paradas_originales = folium.FeatureGroup(name='Ubicaciones Originales', show=True)
        paradas_optimizadas = folium.FeatureGroup(name='Optimizadas por RL', show=True)
        paradas_sin_cambios = folium.FeatureGroup(name='Sin Cambios', show=True)
        colegios_layer = folium.FeatureGroup(name='Colegios Cercanos', show=True)
        hospitales_layer = folium.FeatureGroup(name='Hospitales Cercanos', show=True)
        
        count_optimizadas = 0
        total_mejora = 0
        
        for stop in self.optimized_stops:
            nombre = stop['parada_nombre']
            
            if stop.get('relocated', False):
                lat_orig = stop['lat_original']
                lon_orig = stop['lon_original']
                lat_final = stop['lat']
                lon_final = stop['lon']
                mejora = stop.get('improvement_meters', 0)
                
                folium.CircleMarker(
                    location=[lat_orig, lon_orig],
                    radius=8,
                    popup=f"ORIGINAL: {nombre}",
                    tooltip=f"Original: {nombre}",
                    color='#d32f2f',
                    fillColor='#f44336',
                    fillOpacity=0.8,
                    weight=3
                ).add_to(paradas_originales)
                
                folium.CircleMarker(
                    location=[lat_final, lon_final],
                    radius=10,
                    popup=f"OPTIMIZADA: {nombre} (+{mejora:.1f}m)",
                    tooltip=f"{nombre} (+{mejora:.0f}m)",
                    color='#2e7d32',
                    fillColor='#4caf50',
                    fillOpacity=0.9,
                    weight=4
                ).add_to(paradas_optimizadas)
                
                folium.PolyLine(
                    locations=[[lat_orig, lon_orig], [lat_final, lon_final]],
                    color='#2e7d32',
                    weight=4,
                    opacity=0.9,
                    popup=f"Mejora: {mejora:.1f}m",
                    tooltip=f"Mejora: +{mejora:.0f}m"
                ).add_to(paradas_optimizadas)
                
                count_optimizadas += 1
                total_mejora += mejora
                
            else:
                lat_actual = stop['lat']
                lon_actual = stop['lon']
                
                folium.CircleMarker(
                    location=[lat_actual, lon_actual],
                    radius=6,
                    popup=f"SIN CAMBIOS: {nombre}",
                    tooltip=f"{nombre} (sin cambios)",
                    color='#1976d2',
                    fillColor='#2196f3',
                    fillOpacity=0.7,
                    weight=2
                ).add_to(paradas_sin_cambios)
        
        for _, colegio in self.environment.nearby_colegios.iterrows():
            folium.Marker(
                location=[colegio['lat'], colegio['lon']],
                popup=f"Colegio: {colegio['nombre']}",
                icon=folium.Icon(color='purple', icon='graduation-cap', prefix='fa')
            ).add_to(colegios_layer)
        
        for _, hospital in self.environment.nearby_hospitales.iterrows():
            folium.Marker(
                location=[hospital['lat'], hospital['lon']],
                popup=f"Hospital: {hospital['nombre']}",
                icon=folium.Icon(color='red', icon='plus', prefix='fa')
            ).add_to(hospitales_layer)
        
        paradas_originales.add_to(m)
        paradas_optimizadas.add_to(m)
        paradas_sin_cambios.add_to(m)
        colegios_layer.add_to(m)
        hospitales_layer.add_to(m)
        
        folium.LayerControl(position='topleft', collapsed=False).add_to(m)
        
        os.makedirs('mapas_por_ruta', exist_ok=True)
        filename = f'mapas_por_ruta/ruta_{route_name.replace("-", "_")}.html'
        m.save(filename)
        
        return m
    
    def get_results_summary(self):
        """
        Obtiene un resumen de los resultados de esta ruta.
        
        Returns:
            Dict: Resumen de resultados
        """
        if not self.optimized_stops:
            return {}
        
        optimized_count = len([s for s in self.optimized_stops if s.get('relocated', False)])
        total_improvement = sum([s.get('improvement_meters', 0) for s in self.optimized_stops if s.get('relocated', False)])
        
        return {
            'route_info': self.route_info,
            'total_stops': len(self.optimized_stops),
            'optimized_stops': optimized_count,
            'success_rate': (optimized_count / len(self.optimized_stops)) * 100,
            'total_improvement_meters': total_improvement,
            'avg_improvement_meters': total_improvement / optimized_count if optimized_count > 0 else 0,
            'nearby_colegios': len(self.environment.nearby_colegios),
            'nearby_hospitales': len(self.environment.nearby_hospitales)
        }

class RLBusStopRelocator:
    """Sistema principal que procesa ruta por ruta."""
    
    def __init__(self, rl_config: RLConfig = None, system_config: SystemConfig = None):
        """
        Inicializa el sistema de reubicación de paradas.
        
        Args:
            rl_config (RLConfig, optional): Configuración de RL
            system_config (SystemConfig, optional): Configuración del sistema
        """
        self.rl_config = rl_config or RLConfig()
        self.system_config = system_config or SystemConfig()
        
        self.df_paradas = None
        self.df_colegios = None
        self.df_hospitales = None
        
        self.route_optimizers = {}
        self.all_results = []
        self.routes_summary = []
    
    def load_data(self):
        """
        Carga los datasets necesarios.
        
        Returns:
            bool: True si los datos se cargaron correctamente
        """
        try:
            paths_to_try = [
                '../1_preprocess/0_EDA/result/2_rutas_paradas.csv',
                '../1_preprocess/0_EDA/result/3_barrios_cordoba_escuelas.csv', 
                '../1_preprocess/0_EDA/result/4_centros_de_salud_municipales_cordoba.csv'
            ]
            
            if not any(os.path.exists(path) for path in paths_to_try):
                self._create_sample_data()
                return True
            
            self.df_paradas = pd.read_csv('../1_preprocess/0_EDA/result/2_rutas_paradas.csv')
            self.df_colegios = pd.read_csv('../1_preprocess/0_EDA/result/3_barrios_cordoba_escuelas.csv')
            self.df_hospitales = pd.read_csv('../1_preprocess/0_EDA/result/4_centros_de_salud_municipales_cordoba.csv')
            
            self.df_paradas = self.df_paradas.dropna(subset=['lat', 'lon'])
            self.df_colegios = self.df_colegios.dropna(subset=['lat', 'lon'])
            self.df_hospitales = self.df_hospitales.dropna(subset=['lat', 'lon'])
            
            if self.system_config.verbose:
                print(f"Datos cargados - Paradas: {len(self.df_paradas)}, "
                      f"Colegios: {len(self.df_colegios)}, Hospitales: {len(self.df_hospitales)}")
            
            return True
            
        except Exception as e:
            print(f"Error cargando datos: {e}")
            self._create_sample_data()
            return True
    
    def _create_sample_data(self):
        """Crea datos de ejemplo para Córdoba."""
        print("Creando datos de ejemplo para Córdoba...")
        
        cordoba_center = (-31.4201, -64.1888)
        
        paradas_data = []
        lineas = ['A', 'B', 'C', 'D']
        sentidos = ['I', 'V']
        
        for i, linea in enumerate(lineas):
            for j, sentido in enumerate(sentidos):
                num_paradas = random.randint(8, 15)
                for k in range(num_paradas):
                    offset_lat = (i - 1.5) * 0.02 + (k * 0.005)
                    offset_lon = (j - 0.5) * 0.02 + (k * 0.003)
                    
                    lat = cordoba_center[0] + offset_lat + np.random.normal(0, 0.001)
                    lon = cordoba_center[1] + offset_lon + np.random.normal(0, 0.001)
                    
                    paradas_data.append({
                        'parada_nombre': f'Parada {linea}{k+1}',
                        'lat': lat,
                        'lon': lon,
                        'linea': linea,
                        'sentido': sentido,
                        'codigo': f'P{linea}{sentido}{k+1:02d}'
                    })
        
        colegios_data = []
        for i in range(25):
            lat = cordoba_center[0] + np.random.normal(0, 0.03)
            lon = cordoba_center[1] + np.random.normal(0, 0.03)
            colegios_data.append({
                'nombre': f'Colegio {i+1}',
                'lat': lat,
                'lon': lon
            })
        
        hospitales_data = []
        for i in range(15):
            lat = cordoba_center[0] + np.random.normal(0, 0.025)
            lon = cordoba_center[1] + np.random.normal(0, 0.025)
            hospitales_data.append({
                'nombre': f'Hospital {i+1}',
                'lat': lat,
                'lon': lon
            })
        
        self.df_paradas = pd.DataFrame(paradas_data)
        self.df_colegios = pd.DataFrame(colegios_data)
        self.df_hospitales = pd.DataFrame(hospitales_data)
    
    def process_all_routes(self):
        """
        Procesa todas las rutas individualmente.
        
        Returns:
            pd.DataFrame: DataFrame con todos los resultados
        """
        start_time = time.time()
        
        if not self.load_data():
            return None
        
        available_routes = self.df_paradas[['linea', 'sentido']].drop_duplicates()
        selected_routes = available_routes.head(self.system_config.cantidad_de_rutas)
        
        print(f"Procesando {len(selected_routes)} rutas...")
        
        for _, route_row in selected_routes.iterrows():
            route_info = {
                'linea': route_row['linea'],
                'sentido': route_row['sentido']
            }
            
            try:
                route_optimizer = RouteOptimizer(route_info, self.rl_config, self.system_config)
                route_optimizer.load_route_data(self.df_paradas, self.df_colegios, self.df_hospitales)
                route_optimizer.setup_rl_components()
                route_optimizer.train_agent()
                route_optimizer.optimize_all_stops()
                route_optimizer.create_route_map()
                
                self.route_optimizers[f"{route_info['linea']}-{route_info['sentido']}"] = route_optimizer
                self.all_results.extend(route_optimizer.optimized_stops)
                
                summary = route_optimizer.get_results_summary()
                self.routes_summary.append(summary)
                    
            except Exception as e:
                print(f"Error procesando ruta {route_info['linea']}-{route_info['sentido']}: {e}")
                continue
        
        total_time = time.time() - start_time
        self._create_final_summary(total_time)
        
        return pd.DataFrame(self.all_results)
    
    def _create_final_summary(self, total_time: float):
        """
        Crea un resumen final de todas las rutas procesadas.
        
        Args:
            total_time (float): Tiempo total de procesamiento
        """
        if not self.routes_summary:
            return
        
        total_stops = sum([r['total_stops'] for r in self.routes_summary])
        total_optimized = sum([r['optimized_stops'] for r in self.routes_summary])
        total_improvement = sum([r['total_improvement_meters'] for r in self.routes_summary])
        
        print(f"\nResumen final:")
        print(f"Tiempo total: {total_time:.1f}s")
        print(f"Rutas procesadas: {len(self.routes_summary)}")
        print(f"Paradas totales: {total_stops}")
        print(f"Paradas optimizadas: {total_optimized}")
        print(f"Tasa de éxito: {(total_optimized/total_stops)*100:.1f}%")
        print(f"Mejora total: {total_improvement:.1f} metros")
        
        os.makedirs('reportes', exist_ok=True)
        
        summary_data = {
            'timestamp': pd.Timestamp.now().isoformat(),
            'processing_time_seconds': total_time,
            'total_routes': len(self.routes_summary),
            'total_stops': total_stops,
            'total_optimized': total_optimized,
            'global_success_rate': (total_optimized/total_stops)*100,
            'total_improvement_meters': total_improvement,
            'routes_detail': self.routes_summary
        }
        
        with open('reportes/resumen_rutas_rl.json', 'w', encoding='utf-8') as f:
            json.dump(summary_data, f, indent=2, ensure_ascii=False)

def main_rl_rutas(
    cantidad_de_rutas: int = 3,
    max_paradas_por_ruta: int = 12,
    learning_rate: float = 0.2,
    discount_factor: float = 0.9,
    epsilon_start: float = 1.0,
    epsilon_end: float = 0.1,
    epsilon_decay: float = 0.98,
    max_episodes: int = 80,
    max_steps_per_episode: int = 25,
    verbose: bool = True
):
    """
    Función principal para procesamiento RL ruta por ruta.
    
    Args:
        cantidad_de_rutas (int): Número de rutas a procesar
        max_paradas_por_ruta (int): Máximo de paradas por ruta
        learning_rate (float): Tasa de aprendizaje
        discount_factor (float): Factor de descuento
        epsilon_start (float): Epsilon inicial
        epsilon_end (float): Epsilon final
        epsilon_decay (float): Decaimiento de epsilon
        max_episodes (int): Máximo número de episodios
        max_steps_per_episode (int): Máximo pasos por episodio
        verbose (bool): Si mostrar información detallada
        
    Returns:
        pd.DataFrame: Resultados de la optimización
    """
    
    rl_config = RLConfig(
        learning_rate=learning_rate,
        discount_factor=discount_factor,
        epsilon_start=epsilon_start,
        epsilon_end=epsilon_end,
        epsilon_decay=epsilon_decay,
        max_episodes=max_episodes,
        max_steps_per_episode=max_steps_per_episode
    )
    
    system_config = SystemConfig(
        cantidad_de_rutas=cantidad_de_rutas,
        max_paradas_por_ruta=max_paradas_por_ruta,
        radio_pois_metros=500.0,
        mejora_minima_requerida=25.0,
        distancia_maxima_movimiento=150.0,
        crear_mapas_individuales=True,
        crear_reportes=True,
        verbose=verbose,
        distancia_a_calle_maxima=30.0
    )
    
    rl_system = RLBusStopRelocator(rl_config, system_config)
    result = rl_system.process_all_routes()
    
    if result is not None and not result.empty:
        result.to_csv('reportes/resultados_rl_por_ruta.csv', index=False)
        print("Resultados guardados en: resultados_rl_por_ruta.csv")
        return result
    else:
        print("No se pudieron procesar las rutas")
        return None

if __name__ == "__main__":
    print("Sistema de Optimización RL para Paradas de Bus")
    
    main_rl_rutas(
        cantidad_de_rutas=3,
        max_paradas_por_ruta=12,
        max_episodes=80,
        learning_rate=0.25,
        verbose=True
    )