In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import re
import os

In [2]:
folder_path = '../data/Traffic_data'
bmms = pd.read_excel("../data/BMMS_overview.xlsx")

dataframes = {}

for file in os.listdir(folder_path):
    if file.endswith('.traffic.htm'):
        file_path = os.path.join(folder_path, file)
        file_name = file.split('.')[0]
        df_list = pd.read_html(file_path)
        df = df_list[4]
        df = df.iloc[1:].reset_index(drop=True)
        df_key = file_name
        dataframes[df_key] = df

In [3]:
# Create an empty list to store the modified DataFrames
modified_dfs = []

# Iterate through the dictionary and skip the first two rows of each DataFrame
for key, df in dataframes.items():
    modified_df = df.iloc[2:]  # Skip the first two rows
    modified_dfs.append(modified_df)

# Concatenate all the modified DataFrames into one big DataFrame
big_df = pd.concat(modified_dfs, ignore_index=True)
big_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,N1-1L,Jatrabari - Int.with Z1101 (Left) (Left),LRPS,0,0,LRPS,822,0.822,0.822,402.0,...,1851.0,2980.0,398.0,232.0,889.0,0.0,18236.0,1121.0,19357.0,19357.0
1,N1-1R,Jatrabari - Int.with Z1101 (Left) (Right),LRPS,0,0,LRPS,822,0.822,0.822,660.0,...,2608.0,2508.0,436.0,213.0,1088.0,0.0,20236.0,1301.0,21537.0,21537.0
2,N1-2L,Int.with Z1101 - Signboard (Left) R111 (Left),LRPS,822,0.822,LRPS,4175,4.175,3.353,660.0,...,2608.0,2508.0,436.0,213.0,1088.0,0.0,20236.0,1301.0,21537.0,21537.0
3,N1-2R,Int.with Z1101 - Signboard (Left) R111 (Right),LRPS,822,0.822,LRPS,4175,4.175,3.353,402.0,...,1851.0,2980.0,398.0,232.0,889.0,0.0,18236.0,1121.0,19357.0,19357.0
4,N1-3L,Signboard - Shimrail (Left)R110 (Left),LRPS,4175,4.175,LRPS,7181,7.181,3.006,91.0,...,1690.0,2266.0,1087.0,75.0,1198.0,0.0,16288.0,1273.0,17561.0,17561.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2061,Z8915-3,Int.with Z8916 - Daulatkhan,LRP010,1150,10.47,LRP014,2290,15.66,5.190,0.0,...,25.0,1025.0,872.0,396.0,380.0,0.0,2341.0,776.0,3117.0,3117.0
2062,Z8916-1,Guingerhat (Int.with R890) - Int.with Z8915,LRPS,0,0,LRP004,3626,7.626,7.626,0.0,...,15.0,672.0,640.0,453.0,524.0,0.0,1463.0,977.0,2440.0,2440.0
2063,Z8916-2,Int.with Z8915 - Daulatkhan Bazar,LRP004,3626,7.626,LRP005,6600,11.591,3.965,0.0,...,15.0,672.0,640.0,453.0,524.0,0.0,1463.0,977.0,2440.0,2440.0
2064,Z8943-1,Tazumuddin (Int.with Z8905) - Fakirhat (Int.wi...,LRPS,0,0,LRPS,6446,6.446,6.446,0.0,...,42.0,912.0,679.0,528.0,584.0,0.0,1971.0,1112.0,3083.0,3083.0


In [4]:
columns = df.iloc[1].tolist()
columns[5] = "LRP_2"
columns[6] = "Offset_2"
columns[7] = "Chainage_2"
columns[0] = 'Road'
columns[1] = 'Name'
big_df.columns = columns

In [5]:
big_df['type'] = 'road'
# Define a function to find all road names in the 'Name' column
def find_roads(name):
    # Regular expression to match the pattern described (roads starting with Z, N, or R followed by numbers)
    road_pattern = re.compile(r'\b[ZNR]\d+\b')
    # Find all matches in the name
    found_roads = road_pattern.findall(name)
    return found_roads

# Apply the function to the 'Name' column to create a new column with the list of identified roads
big_df['identified_roads'] = big_df['Name'].apply(find_roads)

In [6]:
big_df['base_road'] = big_df['Road'].apply(lambda x: x.split('-')[0])
big_df['Chainage'] = pd.to_numeric(big_df['Chainage'], errors='coerce')
bmms['chainage'] = pd.to_numeric(bmms['chainage'], errors='coerce')

# Step 1: Select and possibly rename columns from bmms_df
# Assuming 'road', 'chainage', 'name', 'condition' are the columns you want from bmms_df
# and that you've already created a column in traffic_df to accommodate 'condition' data
bmms_subset = bmms[['road', 'chainage', 'name', 'condition', 'lat', 'lon']].copy()
bmms_subset.rename(columns={'road': 'base_road', 'chainage': 'Chainage', 'name': 'Name'}, inplace=True)

# Optionally add any missing columns to bmms_subset with default values or NaN
# For columns in big_df that don't have a counterpart in bmms_subset
for col in big_df.columns:
    if col not in bmms_subset.columns:
        bmms_subset[col] = pd.NA
bmms_subset['type'] = 'bridge'

# Step 2: Use pd.concat to append bmms_subset as new rows to big_df
# Note: Ensure the columns match or have been appropriately accounted for before concatenating
combined_df = pd.concat([big_df, bmms_subset], ignore_index=True)

# Now 'combined_df' contains the original rows from 'traffic_df' and new rows from 'bmms_subset'
combined_df

Unnamed: 0,Road,Name,LRP,Offset,Chainage,LRP_2,Offset_2,Chainage_2,(Km),Heavy Truck,...,Motorized,Non Motorized,Total AADT,(AADT),type,identified_roads,base_road,condition,lat,lon
0,N1-1L,Jatrabari - Int.with Z1101 (Left) (Left),LRPS,0,0.000,LRPS,822,0.822,0.822,402.0,...,18236.0,1121.0,19357.0,19357.0,road,[Z1101],N1,,,
1,N1-1R,Jatrabari - Int.with Z1101 (Left) (Right),LRPS,0,0.000,LRPS,822,0.822,0.822,660.0,...,20236.0,1301.0,21537.0,21537.0,road,[Z1101],N1,,,
2,N1-2L,Int.with Z1101 - Signboard (Left) R111 (Left),LRPS,822,0.822,LRPS,4175,4.175,3.353,660.0,...,20236.0,1301.0,21537.0,21537.0,road,"[Z1101, R111]",N1,,,
3,N1-2R,Int.with Z1101 - Signboard (Left) R111 (Right),LRPS,822,0.822,LRPS,4175,4.175,3.353,402.0,...,18236.0,1121.0,19357.0,19357.0,road,"[Z1101, R111]",N1,,,
4,N1-3L,Signboard - Shimrail (Left)R110 (Left),LRPS,4175,4.175,LRPS,7181,7.181,3.006,91.0,...,16288.0,1273.0,17561.0,17561.0,road,[R110],N1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22476,,.,,,83.728,,,,,,...,,,,,bridge,,Z8806,D,22.183448,90.299962
22477,,.,,,3.981,,,,,,...,,,,,bridge,,Z8810,D,22.519353,90.310063
22478,,Kalbari Bridge,,,20.730,,,,,,...,,,,,bridge,,Z8814,D,22.282704,89.968512
22479,,Narainpur Bridge,,,28.880,,,,,,...,,,,,bridge,,Z8910,D,22.510650,90.535314


In [None]:
df_filtered = combined_df

# Filter out rows where "Road" contains "L"
df_filtered = df_filtered[~df_filtered["Road"].str.contains("L", na=False)]

# Determine road and bridge indices
index_bridges = df_filtered[df_filtered['Road'].isnull()].index
index_roads = df_filtered[df_filtered['Road'].notnull()].index

# Initialize 'Type' column
df_filtered['Type'] = pd.NA
df_filtered.loc[index_roads, 'Type'] = 'Road'
df_filtered.loc[index_bridges, 'Type'] = 'Bridge'

# Function to find road names using regex
def find_roads(name):
    road_pattern = re.compile(r'\b[nNzZrR]\d+\b')
    found_roads = road_pattern.findall(name)
    return found_roads

# Apply the function to identify roads from the 'Name' column
df_filtered['identified_roads'] = df_filtered['Name'].apply(find_roads)

# Update 'Type' to 'Intersection' where applicable
df_filtered.loc[df_filtered['identified_roads'].apply(lambda x: len(x) > 0), 'Type'] = 'Intersection'
df_filtered.to_csv('../data/TEST_traffic_df_with_bridges_and_intersections_only_R.csv', index=False)

In [None]:
# index_bridges = df[df['Road'].isnull()].index
# index_roads = df[df['Road'].notnull()].index
# df['Type'] = pd.NA
# df.loc[index_roads, 'Type'] = 'Road'
# df.loc[index_bridges, 'Type'] = 'Bridge'
# 
# df['Connected_Road1'] = None
# df['Connected_Road2'] = None
# df['Connected_Road3'] = None
# 
# all_road_names = df['Name'].str.findall(r'([nNzZrR]\s*\d+)')
# 
# for index, matches in all_road_names.items():
#     # Ensure matches is a list; treat NaN (float) as an empty list
#     matches = matches if isinstance(matches, list) else []
#     
#     if len(matches) >= 1:
#         df.at[index, 'Connected_Road1'] = matches[0]
#     if len(matches) >= 2:
#         df.at[index, 'Connected_Road2'] = matches[1]
#     if len(matches) >= 3:
#         df.at[index, 'Connected_Road3'] = matches[2]
# 
# df.loc[df['Connected_Road1'].notna(), 'Type'] = 'Intersection'
# df.to_csv('../data/traffic_df_with_bridges_and_intersections.csv', index=False)

In [None]:
df = pd.read_csv('../data/traffic_df_with_bridges_and_intersections_onlyR.csv', low_memory=False)

G = nx.Graph()

for index, row in df.iterrows():
    node_attributes = row.to_dict()
    G.add_node(index, **node_attributes)

df_sorted = df.sort_values(by=['base_road', 'Chainage'])

previous_node = None
previous_base_road = None

for index, row in df_sorted.iterrows():
    current_base_road = row['base_road']
  
    if previous_node is not None and previous_base_road == current_base_road:
        G.add_edge(previous_node, index)
    
    previous_node = index
    previous_base_road = current_base_road

# Re-filter intersections with valid 'lon' and 'lat' and redefine the distance calculation function
intersections = [node for node, attr in G.nodes(data=True) if attr['Type'] == 'Intersection' and not pd.isna(attr['lon']) and not pd.isna(attr['lat'])]

def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on the Earth using their latitude and longitude.
    """
    # Convert latitude and longitude from degrees to radians
    phi1, lambda1, phi2, lambda2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    delta_lambda = lambda2 - lambda1
    delta_phi = phi2 - phi1
    a = np.sin(delta_phi/2.0)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2.0)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))

    # Earth radius in kilometers (approx)
    R = 6371.0

    # Distance in kilometers
    distance = R * c
    return distance

# Re-attempt to connect intersections to the closest node with matching base_road using the correct distance calculation
for intersection in intersections:
    connected_roads = [G.nodes[intersection].get(f'Connected_Road{i}') for i in range(1, 4)]
    connected_roads = [road for road in connected_roads if road and not pd.isna(road)]
    
    # For each connected road, find the closest node and connect
    for connected_road in connected_roads:
        potential_nodes_for_road = [node for node, attr in G.nodes(data=True) if attr['base_road'] == connected_road and not pd.isna(attr['lon']) and not pd.isna(attr['lat'])]
        
        min_distance = np.inf
        closest_node = None
        intersection_lat = G.nodes[intersection]['lat']
        intersection_lon = G.nodes[intersection]['lon']
        
        for node in potential_nodes_for_road:
            node_lat = G.nodes[node]['lat']
            node_lon = G.nodes[node]['lon']
            distance = calculate_distance(intersection_lat, intersection_lon, node_lat, node_lon)
            
            if distance < min_distance:
                min_distance = distance
                closest_node = node
        
        if closest_node is not None:
            G.add_edge(intersection, closest_node)

unique_types = df['Type'].unique()
colors = plt.cm.tab10(range(len(unique_types)))
color_map = dict(zip(unique_types, colors))

plt.figure(figsize=(12,10))
node_colors = [color_map[G.nodes[node]['Type']] for node in G.nodes]
pos = {node:(G.nodes[node]['lon'], G.nodes[node]['lat']) for node in G.nodes}
nx.draw(G, pos, node_color=node_colors, with_labels=False, node_size=1)
plt.show()

In [None]:
G.nodes[10]

In [None]:
G.shortest_path(1,23)

In [None]:
positions = {index: (row['lon'], row['lat']) for index, row in df.iterrows()}
# Ensure the data is sorted by 'base_road' and 'Chainage'
df_sorted = df.sort_values(by=['base_road', 'Chainage'])

# Creating a new DataFrame to hold nodes with positions and chainage for connection logic
df_nodes = df_sorted[['lat', 'lon', 'Chainage', 'base_road']].dropna()

# Reset index to ensure the nodes are indexed sequentially for graph construction
df_nodes.reset_index(drop=True, inplace=True)

# Create positions dictionary again based on the sorted and cleaned DataFrame
positions = {index: (row['lon'], row['lat']) for index, row in df_nodes.iterrows()}

# Create the graph again
G = nx.Graph()

# Add nodes with positions and additional attributes (for potential future use)
for node, row in df_nodes.iterrows():
    G.add_node(node, pos=(row['lon'], row['lat']), chainage=row['Chainage'], base_road=row['base_road'])

# Add edges based on sequential chainage within the same base_road
for i in range(len(df_nodes) - 1):
    if df_nodes.iloc[i]['base_road'] == df_nodes.iloc[i+1]['base_road']:
        G.add_edge(i, i+1)

# Extract positions from graph nodes for plotting
# Draw the graph, including edges
# Add nodes with positions
for node, pos in positions.items():
    G.add_node(node, pos=pos)

# Extract positions from graph nodes for plotting
pos = nx.get_node_attributes(G, 'pos')
plt.figure(figsize=(14, 8))
# Draw the graph
nx.draw(G, pos, node_size=2, with_labels=False, edge_color='b')  # Adjust node_size for better visibility, with_labels for node ids
plt.show()

In [None]:
df_filtered['Motorized'].max()