In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import os
import matplotlib.pyplot as plt
import re

In [None]:
folder_path = '../data/Traffic_data'

dataframes = {}

for file in os.listdir(folder_path):
    if file.endswith('.traffic.htm'):
        file_path = os.path.join(folder_path, file)
        file_name = file.split('.')[0]
        df_list = pd.read_html(file_path)
        df = df_list[4]
        df = df.iloc[1:].reset_index(drop=True)
        df_key = file_name
        dataframes[df_key] = df

In [None]:
# Create an empty list to store the modified DataFrames
modified_dfs = []

# Iterate through the dictionary and skip the first two rows of each DataFrame
for key, df in dataframes.items():
    modified_df = df.iloc[2:]  # Skip the first two rows
    modified_dfs.append(modified_df)

# Concatenate all the modified DataFrames into one big DataFrame
big_df = pd.concat(modified_dfs, ignore_index=True)

In [None]:
columns = df.iloc[1].tolist()
columns[5] = "LRP_2"
columns[6] = "Offset_2"
columns[7] = "Chainage_2"
columns[0] = 'Road'
columns[1] = 'Name'
big_df.columns = columns
big_df

In [None]:
big_df['type'] = 'road'
# Define a function to find all road names in the 'Name' column
def find_roads(name):
    # Regular expression to match the pattern described (roads starting with Z, N, or R followed by numbers)
    road_pattern = re.compile(r'\b[ZNR]\d+\b')
    # Find all matches in the name
    found_roads = road_pattern.findall(name)
    return found_roads

# Apply the function to the 'Name' column to create a new column with the list of identified roads
big_df['identified_roads'] = big_df['Name'].apply(find_roads)


In [None]:
bmms = pd.read_excel("../data/BMMS_overview.xlsx")

In [None]:
bmms

In [None]:
big_df['base_road'] = big_df['Road'].apply(lambda x: x.split('-')[0])
big_df['Chainage'] = pd.to_numeric(big_df['Chainage'], errors='coerce')
bmms['chainage'] = pd.to_numeric(bmms['chainage'], errors='coerce')

# Step 1: Select and possibly rename columns from bmms_df
# Assuming 'road', 'chainage', 'name', 'condition' are the columns you want from bmms_df
# and that you've already created a column in traffic_df to accommodate 'condition' data
bmms_subset = bmms[['road', 'chainage', 'name', 'condition', 'lat', 'lon']].copy()
bmms_subset.rename(columns={'road': 'base_road', 'chainage': 'Chainage', 'name': 'Name'}, inplace=True)

# Optionally add any missing columns to bmms_subset with default values or NaN
# For columns in big_df that don't have a counterpart in bmms_subset
for col in big_df.columns:
    if col not in bmms_subset.columns:
        bmms_subset[col] = pd.NA
bmms_subset['type'] = 'bridge'

# Step 2: Use pd.concat to append bmms_subset as new rows to big_df
# Note: Ensure the columns match or have been appropriately accounted for before concatenating
combined_df = pd.concat([big_df, bmms_subset], ignore_index=True)

# Now 'combined_df' contains the original rows from 'traffic_df' and new rows from 'bmms_subset'
combined_df


In [None]:
combined_df = combined_df.sort_values(by=['base_road', 'Chainage'])
combined_df.reset_index(drop=True, inplace=True)

In [None]:
combined_df

In [None]:
roads = pd.read_csv('../../EPA133a-G2-A3/data/_roads3.csv')
roads

In [None]:
for index, row in combined_df.iterrows():
    if pd.isnull(row['lat']):
        chainage = row['Chainage']
        road = row['base_road']
        closest_chainage = 9999
        closest_lat = 99999
        closest_lon = 99999
        closest_row = None
        for index2, row2 in roads[roads['road'] == road].iterrows():
            if abs(chainage - row2['chainage']) < closest_chainage:
                closest_row = row2
                closest_lat = row2['lat']
                closest_lon = row2['lon']
                closest_chainage = abs(chainage - row2['chainage'])
            else:
                combined_df.loc[index, 'lat'] = closest_lat
                combined_df.loc[index, 'lon'] = closest_lon
                break
        
                
            
            
        

In [None]:
combined_df.to_csv('../data/traffic_df_with_bridges.csv', index=False)

In [None]:
df = combined_df.copy()
df

In [None]:
df_filtered = df[~df["Road"].str.contains("L", na=False)]
df_filtered.to_csv('../data/traffic_df_with_bridges_and_intersections_onlyR.csv')

In [None]:
df_filtered = df_filtered.reset_index()

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Assuming combined_df is your DataFrame and it has 'lat' and 'lon' columns
positions = {index: (
    row['lon'], row['lat'], row['Name'], row['Road'], row['base_road'],
    row['Heavy Truck'], row['Medium Truck'], row['Small Truck'],
    row['Large Bus'], row['Medium Bus'], row['Micro Bus'], row['Utility'],
    row['Car'], row['Auto Rickshaw'], row['Motor Cycle'], row['Bi-Cycle'],
    row['Cycle Rickshaw'], row['Cart'], row['Motorized'], row['Non Motorized'],
    row['Total AADT'], row['(AADT)']
) for index, row in df_filtered.iterrows()}

# Create a graph
G = nx.Graph()

# Add nodes with positions
for node, pos in positions.items():
    G.add_node(node,
               pos=(pos[0], pos[1]),
               Name=pos[2],
               Road=pos[3],
               base_road=pos[4],
               Heavy_Truck=pos[5],
               Medium_Truck=pos[6],
               Small_Truck=pos[7],
               Large_Bus=pos[8],
               Medium_Bus=pos[9],
               Micro_Bus=pos[10],
               Utility=pos[11],
               Car=pos[12],
               Auto_Rickshaw=pos[13],
               Motor_Cycle=pos[14],
               Bi_Cycle=pos[15],
               Cycle_Rickshaw=pos[16],
               Cart=pos[17],
               Motorized=pos[18],
               Non_Motorized=pos[19],
               Total_AADT=pos[20],
               AADT=pos[21])  # Assuming 'AADT' is at pos[21], adjust accordingly


# If you also want to connect nodes, you can add edges here
# For example, to connect each node to its nearest neighbor (simplistic approach):
# for i, node_i_pos in positions.items():
#     closest_node, min_dist = None, float('inf')
#     for j, node_j_pos in positions.items():
#         if i != j:
#             dist = ((node_i_pos[0] - node_j_pos[0])**2 + (node_i_pos[1] - node_j_pos[1])**2)**0.5
#             if dist < min_dist:
#                 closest_node, min_dist = j, dist
#     G.add_edge(i, closest_node)

# Extract positions from graph nodes for plotting
pos = nx.get_node_attributes(G, 'pos')
plt.figure(figsize=(14, 8))
# Draw the graph
nx.draw(G, pos, node_size=0.1)  # Adjust node_size for better visibility
plt.show()


In [None]:
previous_node = None
previous_base_road = None

for index, row in df_filtered.iterrows():
    current_base_road = row['base_road']
  
    if previous_node is not None and previous_base_road == current_base_road:
        G.add_edge(previous_node, index)
    
    previous_node = index
    previous_base_road = current_base_road

In [None]:
for index, row in df_filtered.iterrows():
    row_lat = row['lat']
    row_lon = row['lon']
    best_abs = 9999
    best_row = None
    # Check if the value is not NaN and is an instance of list
    if isinstance(row['identified_roads'], list):
        for road in row['identified_roads']:
            for index2, row2 in df_filtered[df_filtered['base_road'] == road].iterrows():
                diff_lat = abs(row2['lat'] - row_lat)
                diff_lon = abs(row2['lon'] - row_lon)
                diff_abs = diff_lon + diff_lat
                if diff_abs < best_abs:
                    best_abs = diff_abs
                    best_row = row2
    if best_row is not None:
        G.add_edge(best_row.name, row.name, weight=0)
                
                

In [None]:
pos = nx.get_node_attributes(G, 'pos')
plt.figure(figsize=(14, 8))
# Draw the graph
nx.draw(G, pos, node_size=1)  # Adjust node_size for better visibility
plt.show()

In [None]:
# def road_in_graph(graph, road_name):
#     for node in graph.nodes(data=True):
#         if node[1].get('Road') == road_name:
#             return True
#     return False
# 
# def find_potential_edges(road):
#     edges = []
#     base, part = road.rsplit('-', 1)
#     
#     if part.endswith('L'):
#         # Add edges to the next road number on the left side and to the generic road
#         edges.append((road, f"{base}-{str(int(part[:-1])+1)}L"))
#         edges.append((road, f"{base}-{str(int(part[:-1])+1)}"))
#     elif part.endswith('R'):
#         # Add edges to the next road number on the right side (We don't connect R to the generic road as per the rule)
#         edges.append((road, f"{base}-{str(int(part[:-1])+1)}R"))
#     else:
#         # This case handles roads without L or R and tries to increment
#         try:
#             numeric_part = int(part)
#             # Add edges to both L and R of the next road number and to the next generic road
#             edges.append((road, f"{base}-{str(numeric_part+1)}L"))
#             edges.append((road, f"{base}-{str(numeric_part+1)}R"))
#             edges.append((road, f"{base}-{str(numeric_part+1)}"))
#         except ValueError:
#             # In case the part is not numeric, we won't add any edges
#             pass
#     
#     return edges
# 
# # For each road, find and add potential edges
# for road in G.nodes():
#     name_attribute = G.nodes[road].get('Road')
#     if pd.isnull(name_attribute) == False:
#         potential_edges = find_potential_edges(name_attribute)
#         for edge in potential_edges:
#             # print('road', G.nodes[road].get('Road'))
#             # print(potential_edges)
#             if edge[1] in G.nodes():  # Add the edge only if the target node exists
#                 print(edge)
#                 G.add_edge(*edge, color='red')
# 
# # Show a brief summary of the graph to confirm edge creation