In [None]:
import os
import numpy as np
import pandas as pd
import osmnx as ox
from fmm import (
    Network,
    NetworkGraph,
    UBODTGenAlgorithm,
    UBODT,
    FastMapMatch,
    FastMapMatchConfig,
    STMATCH,
    STMATCHConfig,
)
import folium
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors  # Import for rgb2hex
from shapely.geometry import Polygon


# Define the paths to your network data and UBODT file
folder = '/content/data'  # Update this to your folder path
network_file_path = os.path.join(folder, "porto-network", "edges.shp")
ubodt_file_path = os.path.join(folder, "ubodt.txt")


def load_graph(bounds):
    """Create the street network within the bounding box."""
    x1, x2, y1, y2 = bounds
    boundary_polygon = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])
    return ox.graph_from_polygon(boundary_polygon, network_type='drive')


def load_data(file_path, nrows=None):
    """Load the trajectory data."""
    return pd.read_csv(file_path, nrows=nrows)


def clean_trajectory_data(df):
    """Convert trajectory data from string to array of coordinates."""
    train_data = df.to_numpy()

    for i in range(len(train_data)):
        data = train_data[i, 8][2:-2].replace(']', '').replace('[', '').split(',')

        if len(data) > 1:
            try:
                data = np.asarray(data, dtype=float).reshape((len(data) // 2, 2))
            except ValueError:
                # Handle cases where data cannot be reshaped properly
                data = np.asarray([[0.0, 0.0]])
        else:
            data = np.asarray([[0.0, 0.0]])

        train_data[i, 8] = data

    return train_data


def plot_trajectories_folium(G, train_data, traj_indices, output_path, title='Trajectories'):
    """Plot the trajectories using Folium as scatter plot."""
    # Calculate map boundaries
    all_lons = []
    all_lats = []
    for traj_idx in traj_indices:
        traj = train_data[traj_idx, 8]
        if isinstance(traj, np.ndarray):
            all_lons.extend(traj[:, 0])
            all_lats.extend(traj[:, 1])

    if not all_lons or not all_lats:
        raise ValueError("No valid trajectories to plot.")

    x_min, x_max = min(all_lons), max(all_lons)
    y_min, y_max = min(all_lats), max(all_lats)
    map_center = [(y_min + y_max) / 2, (x_min + x_max) / 2]

    # Initialize Folium map
    folium_map = folium.Map(location=map_center, zoom_start=14, control_scale=True)

    # Fit map to bounds
    folium_map.fit_bounds([[y_min, x_min], [y_max, x_max]])

    # Generate a color map with a specific number of colors
    num_traj = len(traj_indices)  # Number of trajectories
    color_map = plt.colormaps['tab10'](np.linspace(0, 1, num_traj))  # Use np.linspace to sample the colormap

    # Plot each trajectory using Folium as a scatter plot (CircleMarkers)
    for i, traj_idx in enumerate(traj_indices):
        traj = train_data[traj_idx, 8]
        if isinstance(traj, np.ndarray) and len(traj) > 1:
            # Convert to list of [lat, lon] pairs for Folium
            traj_coords = traj[:, [1, 0]].tolist()  # Folium expects [lat, lon]

            # Plot each point in the trajectory as a scatter plot (CircleMarker)
            for point in traj_coords:
                folium.CircleMarker(
                    location=point,
                    color=mcolors.rgb2hex(color_map[i]),  # Use mcolors.rgb2hex
                    radius=5,
                    fill=True,
                    fill_color=mcolors.rgb2hex(color_map[i]),
                    fill_opacity=0.8,
                    tooltip=f'Trip {traj_idx + 1}'
                ).add_to(folium_map)

            # Add green marker for the start (first point)
            start_point = traj[0]
            folium.Marker(
                location=[start_point[1], start_point[0]],  # [lat, lon]
                icon=folium.Icon(color='green', icon='circle'),
                popup=f"Start of Trip {traj_idx + 1}"
            ).add_to(folium_map)

            # Add red marker for the end (last point)
            end_point = traj[-1]
            folium.Marker(
                location=[end_point[1], end_point[0]],  # [lat, lon]
                icon=folium.Icon(color='red', icon='circle'),
                popup=f"End of Trip {traj_idx + 1}"
            ).add_to(folium_map)

    # Add a legend
    legend_html = '''
     <div style="
     position: fixed;
     bottom: 50px; left: 50px; width: 150px; height: auto;
     background-color: white; z-index:9999; font-size:14px;
     border:2px solid grey;
     padding: 10px;
     ">
         <p style="margin: 0;"><b>Legend</b></p>
         <ul style="list-style: none; padding-left: 0;">
    '''
    for i, traj_idx in enumerate(traj_indices):
        color_hex = mcolors.rgb2hex(color_map[i])  # Corrected hex conversion
        legend_html += f'<li><span style="background-color:{color_hex};width:20px;height:5px;display:inline-block;margin-right:5px;"></span> Trip {traj_idx + 1}</li>'
    legend_html += '</ul></div>'

    folium_map.get_root().html.add_child(folium.Element(legend_html))

    # Add Layer Control
    folium.LayerControl().add_to(folium_map)

    # Save the map to an HTML file
    folium_map.save(output_path)
    print(f"Interactive Folium map saved to {output_path}")


def remove_outliers(train_data, threshold_multiplier=5):
    """Remove outlying GPS coordinates based on a distance threshold."""
    for i in range(len(train_data)):
        GPS_trajectory = train_data[i, 8]
        num_points = len(GPS_trajectory)

        if num_points > 1:
            route_dist = 0.0
            # Calculate the total route distance
            for j in range(num_points - 1):
                lon_1, lat_1 = GPS_trajectory[j]
                lon_2, lat_2 = GPS_trajectory[j + 1]
                route_dist += np.linalg.norm([lon_1 - lon_2, lat_1 - lat_2])

            ave_dist = route_dist / (num_points - 1)
            j = 0

            # Compare points and remove outliers
            while j < (len(GPS_trajectory) - 1):  # Use len(GPS_trajectory) to get current length
                lon_1, lat_1 = GPS_trajectory[j]
                lon_2, lat_2 = GPS_trajectory[j + 1]
                dist = np.linalg.norm([lon_1 - lon_2, lat_1 - lat_2])

                if dist > threshold_multiplier * ave_dist:
                    GPS_trajectory = np.delete(GPS_trajectory, j + 1, 0)
                    if j > 0:
                        j -= 1  # Move back one step to re-evaluate after deletion
                else:
                    j += 1  # Move to the next point

            train_data[i, 8] = GPS_trajectory

    return train_data


def prepare_fmm_data(train_data):
    """Prepare the data for FastMapMatch."""
    input_data = []
    for i in range(len(train_data)):
        trajectory = train_data[i, 8]
        if isinstance(trajectory, np.ndarray):
            trajectory_str = ','.join([f"{coord[0]} {coord[1]}" for coord in trajectory.tolist()])
            trajectory_wkt = f"LINESTRING({trajectory_str})"
        else:
            trajectory_wkt = "LINESTRING(0 0, 0 0)"  # Default if trajectory is invalid
        input_data.append({'id': train_data[i, 0], 'geom': trajectory_wkt})

    return pd.DataFrame(input_data)


def map_matching(input_data, network, graph, ubodt, fmm_config):
    """Perform Map Matching using FastMapMatch."""
    model = FastMapMatch(network, graph, ubodt)
    results = []

    for idx, geom in enumerate(input_data['geom'].values):
        try:
            result = model.match_wkt(geom, fmm_config)
            results.append({
                'idx': idx,
                'match_path': list(result.cpath),
                'match_edge_by_pt': list(result.opath),
                'match_edge_by_idx': list(result.indices),
                'match_geom': result.mgeom.export_wkt(),
                'match_pt': result.pgeom.export_wkt(),
                'edge_id': [c.edge_id for c in result.candidates],
                'source': [c.source for c in result.candidates],
                'target': [c.target for c in result.candidates],
                'error': [c.error for c in result.candidates],
                'length': [c.length for c in result.candidates],
                'offset': [c.offset for c in result.candidates],
                'spdist': [c.spdist for c in result.candidates],
                'ep': [c.ep for c in result.candidates],
                'tp': [c.tp for c in result.candidates],
            })
        except Exception as e:
            print(f"Map matching failed for index {idx}: {e}")
            results.append(None)  # Append None or handle as needed

    return results

    
def plot_map_matching_folium(G, results, traj_indices, output_path, title='Map Matching Results'):
    """Plot the map-matched trajectories using Folium."""
    # Extract matched geometries and calculate map boundaries
    all_lons = []
    all_lats = []
    matched_trajs = {}

    for traj_idx in traj_indices:
        if results[traj_idx] and 'match_geom' in results[traj_idx]:
            mgeom = results[traj_idx]['match_geom']
            if mgeom.startswith('LINESTRING'):
                coords = mgeom.replace('LINESTRING(', '').replace(')', '').split(',')
                traj_coords = []
                for coord in coords:
                    try:
                        lon, lat = map(float, coord.strip().split())
                        traj_coords.append([lat, lon])
                        all_lons.append(lon)
                        all_lats.append(lat)
                    except ValueError:
                        # Log invalid coordinate or skip if there's an issue
                        print(f"Skipping invalid coordinate: {coord.strip()}")
                        continue
                if traj_coords:
                    matched_trajs[traj_idx] = traj_coords

    if not all_lons or not all_lats:
        raise ValueError("No valid map-matched trajectories to plot.")

    x_min, x_max = min(all_lons), max(all_lons)
    y_min, y_max = min(all_lats), max(all_lats)
    map_center = [(y_min + y_max) / 2, (x_min + x_max) / 2]

    # Initialize Folium map
    folium_map = folium.Map(location=map_center, zoom_start=14, control_scale=True)

    # Fit map to bounds
    folium_map.fit_bounds([[y_min, x_min], [y_max, x_max]])

    # Generate a color map with a specific number of colors
    num_traj = len(traj_indices)  # Number of trajectories
    color_map = plt.colormaps['tab10'](np.linspace(0, 1, num_traj))  # Use np.linspace to sample the colormap

    # Plot each map-matched trajectory
    for i, traj_idx in enumerate(traj_indices):
        traj = matched_trajs.get(traj_idx, [])
        if traj:
            folium.PolyLine(
                traj,
                color=mcolors.rgb2hex(color_map[i]),  # Corrected hex conversion
                weight=5,
                opacity=0.8,
                tooltip=f'Match Trip {traj_idx + 1}'
            ).add_to(folium_map)

    # Add a legend
    legend_html = '''
     <div style="
     position: fixed;
     bottom: 50px; left: 50px; width: 150px; height: auto;
     background-color: white; z-index:9999; font-size:14px;
     border:2px solid grey;
     padding: 10px;
     ">
         <p style="margin: 0;"><b>Legend</b></p>
         <ul style="list-style: none; padding-left: 0;">
    '''
    for i, traj_idx in enumerate(traj_indices):
        color_hex = mcolors.rgb2hex(color_map[i])  # Corrected hex conversion
        legend_html += f'<li><span style="background-color:{color_hex};width:20px;height:5px;display:inline-block;margin-right:5px;"></span> Match Trip {traj_idx + 1}</li>'
    legend_html += '</ul></div>'

    folium_map.get_root().html.add_child(folium.Element(legend_html))

    # Add Layer Control
    folium.LayerControl().add_to(folium_map)

    # Save the map to an HTML file
    folium_map.save(output_path)
    print(f"Interactive Folium map with map matching saved to {output_path}")


def generate_ubodt(network_file_path, ubodt_file_path):
    """Generate UBODT if it does not exist."""
    # Ensure the network file exists
    if not os.path.exists(network_file_path):
        raise FileNotFoundError(f"Network file not found: {network_file_path}")

    # Read the network data
    network = Network(network_file_path, "fid", "u", "v")
    graph = NetworkGraph(network)

    # Check if UBODT file exists
    if os.path.isfile(ubodt_file_path):
        ubodt = UBODT.read_ubodt_csv(ubodt_file_path)
        print("Read the UBODT file")
    else:
        print("Generate and read the UBODT file")

        # Generate the UBODT using the UBODTGenAlgorithm
        ubodt_gen = UBODTGenAlgorithm(network, graph)
        status = ubodt_gen.generate_ubodt(ubodt_file_path, 0.03, binary=False, use_omp=True)
        print(status)

        # After generating the UBODT, load it
        ubodt = UBODT.read_ubodt_csv(ubodt_file_path)

    return ubodt, network, graph  # Return UBODT, Network, and Graph


def main():
    # Define bounding box: (x1, x2, y1, y2)
    bounds = (-8.70, -8.57, 41.19, 41.13)

    # Define file paths
    train_file = os.path.join(folder, "train-1500.csv")
    output_dir = '/content/data/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_file = os.path.join(output_dir, 'Trajectories_Before_Outlier_Removal_Folium.html')
    outlier_file = os.path.join(output_dir, 'Trajectories_After_Outlier_Removal_Folium.html')
    fmm_output_file = os.path.join(output_dir, 'Trajectories_After_FMM_Folium.html')
    trips_csv = os.path.join(output_dir, 'trips2.csv')

    # Generate UBODT and get network and graph
    ubodt, network, graph = generate_ubodt(network_file_path, ubodt_file_path)

    # Load graph and data
    G = load_graph(bounds)
    df = load_data(train_file, nrows=1500)  # Load first 1500 rows of the dataset

    # Clean trajectory data
    train_data = clean_trajectory_data(df)

    # Select the top 15 trajectories (can be based on any criterion, here just the first 15)
    traj_indices = list(range(15))  # Select first 15 trajectories

    # Plot initial trips before outlier removal
    plot_trajectories_folium(
        G,
        train_data,
        traj_indices,
        output_file,
        title='Top 15 Trajectories Before Outlier Removal'
    )

    # Remove outliers
    train_data = remove_outliers(train_data)

    # Plot trips after removing outliers
    plot_trajectories_folium(
        G,
        train_data,
        traj_indices,
        outlier_file,
        title='Top 15 Trajectories After Outlier Removal'
    )

    # Prepare data for FMM
    input_data = prepare_fmm_data(train_data)
    input_data.to_csv(trips_csv, index=False, sep=';')
    print(f"Prepared FMM data saved to {trips_csv}")

    # Map Matching with FMM
    fmm_config = FastMapMatchConfig(16, 0.005, 0.0005)  # Adjust parameters as needed
    results = map_matching(input_data, network, graph, ubodt, fmm_config)
    print("Map matching completed.")

    # Plot Map Matching results
    plot_map_matching_folium(
        G,
        results,
        traj_indices,
        fmm_output_file,
        title='Top 15 Map Matching Results'
    )


if __name__ == "__main__":
    main()
