In [160]:

import numpy as np
import pandas as pd
import osmnx as ox
import geopandas as gpd
from shapely.geometry import Point, LineString, box
import matplotlib.pyplot as plt
from typing import List, Dict, Tuple, Optional
import networkx as nx
from dataclasses import dataclass
import ast
import rtree
import logging
from tqdm import tqdm
from math import pi
from typing import List, Dict, Optional
import seaborn
import os
import folium
from branca.colormap import LinearColormap
from folium.features import DivIcon



# Enhanced Viterbi Matcher Class

In [161]:

SIGMA_Z = 15.0  # Increased sigma_z for more tolerance in emission
MAX_DISTANCE = 50.0  # Increased max_distance for broader candidate search
TURN_ANGLE_THRESHOLD = pi / 4  # 45 degrees threshold for transition penalty
MIN_TRANSITION_PROB = 1e-5  # Non-zero transition probability for flexibility

def process_trajectory(polyline_str: str) -> List[tuple]:
    """Process trajectory string into coordinates with more lenient validation"""
    try:
        if not isinstance(polyline_str, str):
            return None
        coords = ast.literal_eval(polyline_str)
        if not coords:
            return None
        
        # More lenient validation - allow trajectories with at least 2 points
        valid_coords = []
        for coord in coords:
            if len(coord) == 2:
                # More forgiving coordinate validation
                x, y = coord
                if isinstance(x, (int, float)) and isinstance(y, (int, float)):
                    # Wider coordinate bounds
                    if -180 <= x <= 180 and -90 <= y <= 90:
                        valid_coords.append(coord)
        
        return valid_coords if len(valid_coords) >= 2 else None
    except Exception as e:
        logging.warning(f"Error processing trajectory: {str(e)}")
        return None


class EnhancedViterbiMatcher:
    def __init__(self, graph, edges_gdf, config=None):
        """Initialize matcher with improved configuration"""
        self.graph = graph
        self.edges_gdf = edges_gdf.copy()
        
        if isinstance(self.edges_gdf.index, pd.MultiIndex):
            self.edges_gdf = self.edges_gdf.reset_index(drop=True)
        self.edges_gdf.index = range(len(self.edges_gdf))
        
        # Enhanced default configuration
        default_config = {
            'max_candidates': 20,          # Increased from 10
            'max_distance': 100.0,         # Increased from 50.0
            'sigma_z': 50.0,              # Adjusted for better GPS noise handling
            'beta': 2.0,                  # Increased for better transition scoring
            'min_prob_norm': 1e-7,        # Lowered for more flexibility
            'max_speed': 50.0,            # Maximum expected speed (m/s)
            'min_speed': 0.1,             # Minimum expected speed (m/s)
            'angle_tolerance': np.pi/2,    # 90 degrees angle tolerance
            'max_angle_penalty': 0.5,      # Maximum penalty for sharp turns
            'distance_decay': 0.85,        # Distance decay factor
            'sequential_matching': True    # Enable sequential matching for long trajectories
        }
        
        if config:
            default_config.update(config)
        self.config = default_config
        
        self._init_spatial_index()
        self.edge_to_nodes = self._build_edge_to_nodes()
        self.node_to_edges = self._build_node_to_edges()
        
        self.logger = logging.getLogger(__name__)
    
    def _init_spatial_index(self):
        """Initialize R-tree spatial index with improved error handling"""
        try:
            self.spatial_index = rtree.index.Index()
            for idx, edge in self.edges_gdf.iterrows():
                if edge.geometry is not None and not edge.geometry.is_empty:
                    self.spatial_index.insert(idx, edge.geometry.bounds)
        except Exception as e:
            self.logger.error(f"Error initializing spatial index: {str(e)}")
            raise

    def _build_edge_to_nodes(self) -> Dict[int, set]:
        """Build mapping from edge IDs to their endpoint nodes with validation"""
        edge_to_nodes = {}
        for idx, edge in self.edges_gdf.iterrows():
            if edge.geometry is not None and not edge.geometry.is_empty:
                coords = list(edge.geometry.coords)
                if len(coords) >= 2:  # Ensure valid linestring
                    edge_to_nodes[idx] = {
                        self._get_node_id(coords[0]),
                        self._get_node_id(coords[-1])
                    }
        return edge_to_nodes

    def _build_node_to_edges(self) -> Dict[tuple, set]:
        """Build mapping from nodes to connected edge IDs"""
        node_to_edges = {}
        for edge_id, nodes in self.edge_to_nodes.items():
            for node in nodes:
                if node not in node_to_edges:
                    node_to_edges[node] = set()
                node_to_edges[node].add(edge_id)
        return node_to_edges

    def _get_node_id(self, coord: tuple) -> tuple:
        """Convert coordinate to node ID with improved precision"""
        return tuple(round(x, 6) for x in coord)

    def _find_candidates(self, point: Point) -> List[dict]:
        """Enhanced candidate finding with adaptive search radius and reduced distance threshold."""
        candidates = []
        initial_distance = 30.0  # Start with a reduced search radius of 30 meters
        max_attempts = 3
        current_distance = initial_distance
        
        for attempt in range(max_attempts):
            bounds = (
                point.x - current_distance,
                point.y - current_distance,
                point.x + current_distance,
                point.y + current_distance
            )
            
            for idx in self.spatial_index.intersection(bounds):
                edge = self.edges_gdf.loc[idx]
                if edge.geometry is not None:
                    dist = point.distance(edge.geometry)
                    if dist <= current_distance:
                        proj_point = edge.geometry.interpolate(
                            edge.geometry.project(point)
                        )
                        candidates.append({
                            'edge_id': idx,
                            'distance': dist,
                            'proj_point': proj_point,
                            'edge': edge
                        })
            
            if candidates:
                break
                
            current_distance *= 1.5  # Increase search radius for next attempt
        
        # Sort by distance and apply adaptive limit
        candidates.sort(key=lambda x: x['distance'])
        return candidates[:self.config['max_candidates']]
    
    

    def _calculate_emission_prob(self, point: Point, candidate: dict) -> float:
        """Enhanced emission probability calculation with improved scaling"""
        distance = candidate['distance']
        sigma_z = self.config['sigma_z']
        
        # Distance-based probability with decay
        distance_factor = np.exp(-distance * self.config['distance_decay'])
        
        # Gaussian probability
        gaussian_prob = np.exp(-0.5 * (distance / sigma_z) ** 2)
        
        # Combined probability
        prob = gaussian_prob * distance_factor
        
        return max(prob, self.config['min_prob_norm'])

    def _calculate_transition_prob(self, prev_edge: int, curr_edge: int,
                                 prev_point: Point, curr_point: Point) -> float:
        """Enhanced transition probability calculation with improved angle handling"""
        prev_nodes = self.edge_to_nodes[prev_edge]
        curr_nodes = self.edge_to_nodes[curr_edge]
        
        # Check connectivity with more flexibility
        connected = bool(prev_nodes.intersection(curr_nodes))
        connectivity_score = 1.0 if connected else 0.3
        
        # Calculate angle similarity
        dir1 = np.array(prev_point.coords[-1]) - np.array(prev_point.coords[0])
        dir2 = np.array(curr_point.coords[-1]) - np.array(curr_point.coords[0])
        
        norm1 = np.linalg.norm(dir1)
        norm2 = np.linalg.norm(dir2)
        
        if norm1 == 0 or norm2 == 0:
            angle_score = 1.0
        else:
            cos_angle = np.dot(dir1, dir2) / (norm1 * norm2)
            angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
            
            # Smoother angle penalty
            angle_score = 1.0 - (angle / self.config['angle_tolerance']) * self.config['max_angle_penalty']
            angle_score = max(angle_score, 1.0 - self.config['max_angle_penalty'])
        
        # Combined probability
        prob = connectivity_score * angle_score
        
        return max(prob, self.config['min_prob_norm'])

    def _viterbi_matching(self, points: List[Point], candidates_by_point: List[List[dict]]) -> List[Dict]:
        """Improved Viterbi algorithm with better numerical stability"""
        n_points = len(points)
        states = [{} for _ in range(n_points)]
        
        # Initialize first state with log probabilities
        for candidate in candidates_by_point[0]:
            edge_id = candidate['edge_id']
            log_emission = np.log(self._calculate_emission_prob(points[0], candidate))
            states[0][edge_id] = {
                'log_prob': log_emission,
                'prev': None,
                'emission': log_emission,
                'transition': 0.0
            }
        
        # Forward pass with log probabilities
        for t in range(1, n_points):
            for candidate in candidates_by_point[t]:
                curr_edge = candidate['edge_id']
                log_emission = np.log(self._calculate_emission_prob(points[t], candidate))
                
                best_log_prob = float('-inf')
                best_prev = None
                best_transition = None
                
                for prev_edge, prev_state in states[t-1].items():
                    trans_prob = self._calculate_transition_prob(
                        prev_edge, curr_edge, points[t-1], points[t]
                    )
                    log_transition = np.log(trans_prob)
                    
                    log_prob = prev_state['log_prob'] + log_transition + log_emission
                    
                    if log_prob > best_log_prob:
                        best_log_prob = log_prob
                        best_prev = prev_edge
                        best_transition = log_transition
                
                if best_prev is not None:
                    states[t][curr_edge] = {
                        'log_prob': best_log_prob,
                        'prev': best_prev,
                        'emission': log_emission,
                        'transition': best_transition
                    }
        
        # Convert log probabilities to normalized confidence scores
        if states[-1]:
            log_probs = np.array([state['log_prob'] for state in states[-1].values()])
            max_log_prob = np.max(log_probs)
            normalized_probs = np.exp(log_probs - max_log_prob)
            normalized_probs /= np.sum(normalized_probs)
            
            for edge_id, norm_prob in zip(states[-1].keys(), normalized_probs):
                states[-1][edge_id]['confidence'] = norm_prob
        
        return states

    def _backtrack(self, states: List[Dict]) -> List[int]:
        """Backtrack to find the best path with improved handling of edge cases"""
        if not states or not states[-1]:
            return []
        
        path = []
        current_edge = max(states[-1].items(), key=lambda x: x[1]['log_prob'])[0]
        
        for t in range(len(states) - 1, -1, -1):
            path.append(current_edge)
            if t > 0 and states[t][current_edge]['prev'] is not None:
                current_edge = states[t][current_edge]['prev']
        
        return list(reversed(path))

    def _sequential_matching(self, points: List[Point]) -> Dict:
        """Match long trajectories in sequential segments with overlap"""
        segment_size = 30
        overlap = 10
        all_edges = []
        segment_confidences = []
        
        for i in range(0, len(points), segment_size - overlap):
            segment = points[i:i + segment_size]
            if len(segment) < 2:
                continue
                
            candidates = [self._find_candidates(p) for p in segment]
            if not all(candidates):
                continue
                
            states = self._viterbi_matching(segment, candidates)
            path = self._backtrack(states)
            
            if path:
                if states[-1] and path[-1] in states[-1]:
                    segment_confidences.append(states[-1][path[-1]].get('confidence', 0.0))
                    
                # Remove overlap with previous segment
                if all_edges and overlap > 0:
                    all_edges = all_edges[:-overlap]
                all_edges.extend(path)
        
        if not all_edges:
            return {'success': False, 'edges': [], 'confidence': 0.0}
        
        # Calculate overall confidence as average of segment confidences
        overall_confidence = np.mean(segment_confidences) if segment_confidences else 0.0
            
        return {
            'success': True,
            'edges': all_edges,
            'confidence': overall_confidence
        }

    def match_trajectory(self, points: List[Tuple[float, float]]) -> Dict:
        """Match trajectory with improved error handling and validation"""
        try:
            if len(points) < 2:
                return {'success': False, 'edges': [], 'confidence': 0.0}

            point_objects = [Point(p) for p in points]
            
            # Use sequential matching for long trajectories
            if self.config['sequential_matching'] and len(points) > 50:
                return self._sequential_matching(point_objects)
            
            # Standard matching for shorter trajectories
            candidates_by_point = [self._find_candidates(p) for p in point_objects]
            
            if not all(candidates_by_point):
                return {'success': False, 'edges': [], 'confidence': 0.0}
            
            states = self._viterbi_matching(point_objects, candidates_by_point)
            path = self._backtrack(states)
            
            if not path:
                return {'success': False, 'edges': [], 'confidence': 0.0}
            
            confidence = states[-1][path[-1]].get('confidence', 0.0)
            
            return {
                'success': True,
                'edges': path,
                'confidence': confidence
            }
                
        except Exception as e:
            self.logger.error(f"Error in match_trajectory: {str(e)}")
            return {'success': False, 'edges': [], 'confidence': 0.0}

# Route Analyzer Class

In [162]:
class RouteAnalyzer:
    
    """Analyze mapped routes for frequently traversed and slow segments with enhanced analysis"""
    def __init__(self, matcher, matched_results: List[Dict], output_dir: str = 'map_matching_results'):
        self.matcher = matcher
        self.matched_results = matched_results
        self.output_dir = output_dir
        self.analysis_dir = os.path.join(output_dir, 'route_analysis')
        os.makedirs(self.analysis_dir, exist_ok=True)
        
        # Convert edges to WGS84 for visualization
        self.edges_wgs84 = matcher.edges_gdf.to_crs('EPSG:4326')
        
        # Initialize segment statistics
        self.segment_stats = self._initialize_segment_stats()
    
    
    def _analyze_trajectory_segment(self, edge_id: int, edge_geom, coords: List[tuple], timestamps: List[int]) -> tuple:
        """Analyze trajectory segment using actual timestamps between trajectory points."""
        edge_length = edge_geom.length
        if len(coords) < 2 or len(timestamps) < 2:
            return 0, 0

        # Track time intervals and calculate travel time using timestamps
        time_diffs = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps) - 1)]
        total_time = sum(time_diffs)

        if total_time <= 0:
            return 0, 0
        
        # Calculate speed (m/s)
        speed = edge_length / total_time

        # Validate speed to be within realistic ranges for urban areas
        MIN_SPEED = 1.389  # 5 km/h in m/s
        MAX_SPEED = 13.89  # 50 km/h in m/s

        if speed < MIN_SPEED:
            speed = MIN_SPEED
            total_time = edge_length / MIN_SPEED
        elif speed > MAX_SPEED:
            speed = MAX_SPEED
            total_time = edge_length / MAX_SPEED
        
        return speed, total_time
    
    def _initialize_segment_stats(self) -> Dict:
        """Initialize statistics for each road segment using actual timestamps"""
        stats = {}
        
        for result in self.matched_results:
            if not result['match_result']['success']:
                continue
            
            coords = result['original_coords']
            edges = result['match_result']['edges']
            timestamps = result['timestamps']
            
            if len(coords) < 2 or not edges or timestamps is None:
                continue
            
            # Process each edge in the matched path
            for edge_id in edges:
                if edge_id not in stats:
                    stats[edge_id] = {
                        'traverse_count': 0,
                        'length': 0,
                        'speeds': [],
                        'times': [],
                        'distance_traversed': 0  # Changed from total_traversed to distance_traversed
                    }
                
                edge_geom = self.matcher.edges_gdf.loc[edge_id].geometry
                stats[edge_id]['length'] = edge_geom.length
                stats[edge_id]['distance_traversed'] += edge_geom.length
                
                speed, time = self._analyze_trajectory_segment(
                    edge_id, 
                    edge_geom, 
                    coords,
                    timestamps
                )
                
                if speed > 0 and time > 0:
                    stats[edge_id]['traverse_count'] += 1
                    stats[edge_id]['speeds'].append(speed)
                    stats[edge_id]['times'].append(time)
        
        # Calculate aggregate statistics
        for edge_id, edge_stats in stats.items():
            if edge_stats['traverse_count'] > 0 and edge_stats['speeds']:
                edge_stats['avg_speed'] = np.mean(edge_stats['speeds'])
                edge_stats['speed_std'] = np.std(edge_stats['speeds']) if len(edge_stats['speeds']) > 1 else 0
                edge_stats['avg_time'] = np.mean(edge_stats['times'])
                edge_stats['congestion_index'] = (edge_stats['speed_std'] / edge_stats['avg_speed'] 
                                                if edge_stats['avg_speed'] > 0 else 0)
        
        return stats
        
    

    

    def get_most_traversed_segments(self, n: int = 10) -> List[Dict]:
        """Return the n most frequently traversed road segments"""
        segments = []
        for edge_id, stats in self.segment_stats.items():
            if stats['traverse_count'] > 0:
                segments.append({
                    'edge_id': edge_id,
                    'count': stats['traverse_count'],
                    'geometry': self.edges_wgs84.loc[edge_id].geometry,
                    'avg_speed': stats.get('avg_speed', 0),
                    'avg_time': stats.get('avg_time', 0),
                    'length': stats['length'],
                    'speed_std': stats.get('speed_std', 0),
                    'congestion_index': stats.get('congestion_index', 0),
                    'distance_traversed': stats['distance_traversed']  # Added this line
                })
        
        segments.sort(key=lambda x: x['count'], reverse=True)
        return segments[:n]
    
    def get_slowest_segments(self, n: int = 10) -> List[Dict]:
        """Return the n slowest road segments based on average speed"""
        segments = []
        min_length = 50  # Only consider segments longer than 50m
        
        for edge_id, stats in self.segment_stats.items():
            if (stats['traverse_count'] > 0 and 
                stats.get('avg_speed', 0) > 0 and 
                stats['length'] >= min_length):
                segments.append({
                    'edge_id': edge_id,
                    'avg_speed': stats['avg_speed'],
                    'avg_time': stats['avg_time'],
                    'count': stats['traverse_count'],
                    'geometry': self.edges_wgs84.loc[edge_id].geometry,
                    'length': stats['length'],
                    'speed_std': stats['speed_std'],
                    'congestion_index': stats['congestion_index']
                })
        
        segments.sort(key=lambda x: x['avg_speed'])
        return segments[:n]

    def generate_enhanced_report(self, most_traversed: List[Dict], slowest_segments: List[Dict]) -> str:
        """Generate a comprehensive analysis report"""
        report = "Enhanced Route Analysis Report\n"
        report += "===========================\n\n"
        
        # Most Traversed Segments Analysis
        report += "Most Frequently Traversed Segments:\n"
        report += "--------------------------------\n"
        for idx, segment in enumerate(most_traversed, 1):
            speed_kmh = segment['avg_speed'] * 3.6  # Convert to km/h
            
            report += f"Rank {idx}:\n"
            report += f"  Edge ID: {segment['edge_id']}\n"
            report += f"  Traverse Count: {segment['count']}\n"
            report += f"  Length: {segment['length']:.2f} meters\n"
            report += f"  Average Travel Time: {segment['avg_time']:.2f} seconds\n"
            report += f"  Average Speed: {segment['avg_speed']:.2f} m/s\n"
            report += f"  Average Speed (km/h): {speed_kmh:.2f} km/h\n"
            report += f"  Speed Std Dev: {segment['speed_std']:.2f} m/s\n"
            report += f"  Total Distance Traversed: {segment['distance_traversed']:.2f} meters\n"
            report += f"  Congestion Index: {segment['congestion_index']:.3f}\n\n"
        
        # Slowest Segments Analysis
        report += "\nSlowest Segments Analysis:\n"
        report += "------------------------\n"
        for idx, segment in enumerate(slowest_segments, 1):
            speed_kmh = segment['avg_speed'] * 3.6
            
            report += f"Rank {idx}:\n"
            report += f"  Edge ID: {segment['edge_id']}\n"
            report += f"  Length: {segment['length']:.2f} meters\n"
            report += f"  Average Travel Time: {segment['avg_time']:.2f} seconds\n"
            report += f"  Average Speed: {segment['avg_speed']:.2f} m/s\n"
            report += f"  Average Speed (km/h): {speed_kmh:.2f} km/h\n"
            report += f"  Traverse Count: {segment['count']}\n"
            report += f"  Speed Std Dev: {segment['speed_std']:.2f} m/s\n"
            report += f"  Congestion Index: {segment['congestion_index']:.3f}\n\n"
        
        # Overall Network Statistics
        report += "\nOverall Network Statistics:\n"
        report += "-------------------------\n"
        all_segments = len(self.segment_stats)
        traversed_segments = sum(1 for stats in self.segment_stats.values() if stats['traverse_count'] > 0)
        
        # Calculate average network speed (excluding zero speeds)
        valid_speeds = [stats['avg_speed'] for stats in self.segment_stats.values() 
                       if stats['traverse_count'] > 0 and stats['avg_speed'] > 0]
        avg_network_speed = np.mean(valid_speeds) if valid_speeds else 0
        
        report += f"Total Road Segments: {all_segments}\n"
        report += f"Traversed Segments: {traversed_segments} ({(traversed_segments/all_segments)*100:.1f}%)\n"
        report += f"Average Traversal Count: {np.mean([stats['traverse_count'] for stats in self.segment_stats.values()]):.2f}\n"
        report += f"Average Network Speed: {avg_network_speed:.2f} m/s ({avg_network_speed * 3.6:.2f} km/h)\n"
        
        return report
    
    
    
    def analyze_and_visualize_enhanced(self):
        """Perform complete route analysis with error handling"""
        try:
            # Get analyzed segments
            most_traversed = self.get_most_traversed_segments()
            slowest_segments = self.get_slowest_segments()
            
            if not most_traversed and not slowest_segments:
                logging.warning("No valid segments found for analysis")
                return [], [], None
            
            # Generate enhanced report
            report = self.generate_enhanced_report(most_traversed, slowest_segments)
            
            # Save report
            report_path = os.path.join(self.analysis_dir, 'enhanced_route_analysis_report.txt')
            with open(report_path, 'w') as f:
                f.write(report)
            
            # Create visualizations if we have segments
            if most_traversed:
                self.visualize_segments_enhanced(
                    most_traversed,
                    "Most Frequently Traversed Road Segments",
                    "most_traversed_segments_enhanced.html",
                    "traverse frequency",
                    ['#fff7ec', '#fee8c8', '#fdd49e', '#fdbb84', '#fc8d59', '#ef6548', '#d7301f', '#990000']
                )
            
            if slowest_segments:
                self.visualize_segments_enhanced(
                    slowest_segments,
                    "Road Segments with Highest Average Travel Time",
                    "slowest_segments_enhanced.html",
                    "average travel time",
                    ['#f7fcfd', '#e0ecf4', '#bfd3e6', '#9ebcda', '#8c96c6', '#8c6bb1', '#88419d', '#6e016b']
                )
            
            return most_traversed, slowest_segments, report_path
            
        except Exception as e:
            logging.error(f"Error in analyze_and_visualize_enhanced: {str(e)}")
            return [], [], None
    
    
    
    
    
    def visualize_segments_enhanced(self, segments: List[Dict], title: str, filename: str, 
                                  metric_name: str, color_scheme: List[str]):
        """Create an enhanced interactive visualization with error handling"""
        # Check if we have segments to visualize
        if not segments:
            logging.warning(f"No segments to visualize for {title}")
            return
        
        # Create base map centered on Porto
        center_lat, center_lon = 41.1579, -8.6291
        m = folium.Map(
            location=[center_lat, center_lon],
            zoom_start=13,
            tiles='cartodbpositron'
        )
        
        # Add all road network in very light gray
        for _, edge in self.edges_wgs84.iterrows():
            if edge.geometry is not None:
                coords = [(y, x) for x, y in edge.geometry.coords]
                folium.PolyLine(
                    coords,
                    weight=1,
                    color='lightgray',
                    opacity=0.2
                ).add_to(m)
        
        # Create color scale for highlighted segments
        metric_display = 'Traverse Count'
        if segments:  # Check if we have any segments
            if 'count' in segments[0]:
                values = [seg['count'] for seg in segments]
            else:
                values = [seg.get('avg_time', 0) for seg in segments]
                metric_display = 'Average Travel Time'
                
            max_value = max(values) if values else 1
            min_value = min(values) if values else 0
            
            colormap = LinearColormap(
                colors=color_scheme,
                vmin=min_value,
                vmax=max_value,
            )
            
            # Add highlighted segments with rank numbers
            for rank, segment in enumerate(segments, 1):
                # Get coordinates and create line
                coords = [(y, x) for x, y in segment['geometry'].coords]
                value = segment.get('count', 0) or segment.get('avg_time', 0)
                
                # Create detailed popup text
                if 'count' in segment:
                    popup_text = (
                        f"<div style='font-family: Arial; font-size: 12px;'>"
                        f"<strong>Rank: {rank}</strong><br>"
                        f"Edge ID: {segment['edge_id']}<br>"
                        f"Traverse count: {segment['count']}<br>"
                        f"Average Speed: {segment['avg_speed']:.2f} m/s<br>"
                        f"Average Speed: {(segment['avg_speed'] * 3.6):.2f} km/h<br>"
                        f"Length: {segment['length']:.2f} m<br>"
                        f"</div>"
                    )
                else:
                    popup_text = (
                        f"<div style='font-family: Arial; font-size: 12px;'>"
                        f"<strong>Rank: {rank}</strong><br>"
                        f"Edge ID: {segment['edge_id']}<br>"
                        f"Avg time: {segment['avg_time']:.2f} s<br>"
                        f"Average Speed: {segment['avg_speed']:.2f} m/s<br>"
                        f"Average Speed: {(segment['avg_speed'] * 3.6):.2f} km/h<br>"
                        f"Traverse count: {segment['count']}<br>"
                        f"</div>"
                    )
                
                # Add the segment line
                folium.PolyLine(
                    coords,
                    weight=5,
                    color=colormap(value),
                    opacity=0.8,
                    popup=folium.Popup(popup_text, max_width=200)
                ).add_to(m)
                
                # Calculate midpoint
                if len(coords) > 1:
                    mid_lat = sum(coord[0] for coord in coords) / len(coords)
                    mid_lon = sum(coord[1] for coord in coords) / len(coords)
                    midpoint = (mid_lat, mid_lon)
                    
                    # Create a single marker with the circle and rank number
                    folium.Marker(
                        location=midpoint,
                        icon=DivIcon(
                            icon_size=(24, 24),
                            icon_anchor=(12, 12),
                            html=f'''
                                <div style="
                                    width: 24px;
                                    height: 24px;
                                    background-color: transparent;
                                    border: 2px solid black;
                                    border-radius: 50%;
                                    display: flex;
                                    align-items: center;
                                    justify-content: center;
                                    font-size: 14px;
                                    font-weight: bold;
                                    font-family: Arial;
                                    color: black;
                                ">
                                    {rank}
                                </div>
                            '''
                        )
                    ).add_to(m)
            
            # Add color scale
            colormap.add_to(m)
            colormap.caption = metric_display
        
        # Add enhanced title and legend
        title_html = f'''
            <div style="position: fixed; 
                        top: 10px; left: 50%; 
                        transform: translateX(-50%);
                        background-color: white;
                        border-radius: 5px;
                        padding: 10px;
                        z-index: 1000;
                        box-shadow: 0 2px 5px rgba(0,0,0,0.2);">
                <h4 style="margin: 0; color: #2c3e50;">{title}</h4>
                <p style="margin: 5px 0 0 0; font-size: 12px; color: #7f8c8d;">
                    Top {len(segments)} segments ranked by {metric_name}
                </p>
            </div>
        '''
        m.get_root().html.add_child(folium.Element(title_html))
        
        # Save map
        m.save(os.path.join(self.analysis_dir, filename))
    
    

In [165]:
def process_trajectory_with_time(row):
    """Process trajectory with actual timestamps from the data."""
    try:
        coords = ast.literal_eval(row['POLYLINE'])
        if not coords or len(coords) < 2:
            return None
            
        # Get the starting timestamp from data
        start_timestamp = int(row['TIMESTAMP'])  # Convert to int explicitly
        
        # Generate timestamps for each coordinate assuming 15-second intervals between points
        timestamps = [start_timestamp + i * 15 for i in range(len(coords))]
        
        return {
            'coords': coords,
            'timestamps': timestamps,
            'start_timestamp': start_timestamp  # Keep the start_timestamp as a separate entry
        }
    except Exception as e:
        logging.warning(f"Error processing trajectory: {str(e)}")
        return None

def main():

    # Load road network
    print("Loading road network...")
    G = ox.graph_from_place('Porto, Portugal', network_type='drive')
    nodes, edges = ox.graph_to_gdfs(G)
    
    # Convert to UTM coordinates for accurate distance calculations
    utm_crs = 'EPSG:32629'  # UTM zone 29N for Porto
    edges = edges.to_crs(utm_crs)
    logger = logging.getLogger(__name__)
    
    # Load trajectory data
    print("Loading trajectory data...")
    df = pd.read_csv('kraggle_data/train/train.csv', nrows=1500)
    
    # Initialize matcher
    print("Initializing matcher...")
    config = {
        'max_candidates': 8,
        'max_distance': 100.0,
        'sigma_z': 10.0,
        'beta': 1.5
    }
    matcher = EnhancedViterbiMatcher(G, edges, config)
    
    # Process trajectories
    print("Processing trajectories...")
    matched_results = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        trajectory_data = process_trajectory_with_time(row)
        if trajectory_data:
            # Convert coordinates to UTM
            point_gdf = gpd.GeoDataFrame(
                geometry=[Point(x, y) for x, y in trajectory_data['coords']],
                crs='EPSG:4326'
            ).to_crs(utm_crs)
            
            utm_coords = [(p.x, p.y) for p in point_gdf.geometry]
            
            # Match trajectory
            result = matcher.match_trajectory(utm_coords)
            
            if result['success']:
                matched_results.append({
                    'match_result': result,
                    'original_coords': trajectory_data['coords'],
                    'timestamps': trajectory_data['timestamps'],  # Add timestamps here
                    'start_timestamp': trajectory_data['start_timestamp']
                })
                #print(f"Successfully matched trajectory {idx}")
    
    if matched_results:
        output_dir = 'map_matching_results'
        
        # Perform route analysis
        logger.info("Performing route analysis...")
        # Create analyzer instance
        analyzer = RouteAnalyzer(matcher, matched_results, output_dir)
        # Perform enhanced analysis and visualization
        most_traversed, slowest_segments, report_path = analyzer.analyze_and_visualize_enhanced()
        
        logger.info(f"Successfully analyzed {len(matched_results)} trajectories")
        logger.info(f"Reports and visualizations saved in {output_dir} folder")
    else:
        logger.warning("No trajectories were successfully matched")
    
if __name__ == "__main__":
    main()
    
    

Loading road network...
Loading trajectory data...
Initializing matcher...
Processing trajectories...


100%|██████████| 1500/1500 [00:34<00:00, 43.37it/s]
