## Import & Initialize

In [3]:
import os
import time
from datetime import datetime
from dotenv import load_dotenv

import googlemaps
import gmplot

import pandas as pd

import re
from difflib import SequenceMatcher
from fuzzywuzzy import fuzz


In [4]:
load_dotenv()
API_KEY = os.getenv('GOOGLE_MAPS_API_KEY_3')

In [5]:
gmaps = googlemaps.Client(key=API_KEY)

## Filter Road Names (Didn't Work)

In [16]:
def clean_road_name(road_name):
    road_name = road_name.lower()
    road_name = re.sub(r'[^\w\s]', '', road_name)
    return road_name

def are_similar(name1, name2, threshold=0.8):
    similarity = SequenceMatcher(None, name1, name2).ratio()
    return similarity >= threshold

In [17]:
def get_geocode(road_name, city="Riyadh", country="Saudi Arabia"):

    queries = [road_name, f"{road_name}, {city}", f"{road_name}, {city}, {country}"]
    
    for query in queries:
        geocode_result = gmaps.geocode(query)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            return location['lat'], location['lng']
    
    return None, None


In [18]:
def get_traffic_data(road_name):
    try:
        origin_lat, origin_lng = get_geocode(road_name + " start")
        destination_lat, destination_lng = get_geocode(road_name + " end")
        
        if origin_lat is None or destination_lat is None:
            print(f"Error getting geocodes for {road_name}")
            return None

        origin = f"{origin_lat},{origin_lng}"
        destination = f"{destination_lat},{destination_lng}"
        
        traffic_data = gmaps.distance_matrix(origins=origin, 
                                            destinations=destination, 
                                            mode="driving", 
                                            departure_time="now")
        element = traffic_data['rows'][0]['elements'][0]
        
        if 'duration_in_traffic' in element:
            traffic_duration = element['duration_in_traffic']['value']
        elif 'duration' in element:
            traffic_duration = element['duration']['value']
            print(f"Traffic data not available for {road_name}, using regular duration")
        else:
            print(f"No duration data found for {road_name}")
            return None

        return traffic_duration
    except Exception as e:
        print(f"Error getting traffic data for {road_name}: {e}")
        return None


In [19]:
def deduplicate_roads(road_list):
    cleaned_roads = []
    unique_roads = []
    
    for road in road_list:
        cleaned_name = clean_road_name(road)
        
        if not any(are_similar(cleaned_name, clean_road_name(r)) for r in unique_roads):
            unique_roads.append(road)
    
    return unique_roads

In [20]:
def rank_roads_by_traffic(roads):
    road_traffic = []
    
    for road in roads:
        traffic = get_traffic_data(road)
        if traffic is not None:
            road_traffic.append((road, traffic))
    
    road_traffic.sort(key=lambda x: x[1], reverse=True)
    return road_traffic[:200]


In [21]:
def get_top_200_traffic_roads(road_list):
    unique_roads = deduplicate_roads(road_list)

    top_200_roads = rank_roads_by_traffic(unique_roads)
    
    return top_200_roads


top_200_roads = get_top_200_traffic_roads(all_roads)

for road, traffic in top_200_roads:
    print(f"{road}: {traffic} seconds of traffic delay")


NameError: name 'all_roads' is not defined

In [52]:
len(top_200_roads)

200

## Clean Data

In [47]:
with open("unique_road_names.txt", "r") as f:
    loaded_road_names = set(line.strip() for line in f)

In [50]:
def remove_duplicate_roads(road_names):
    unique_roads = []
    threshold = 80 
    for road in road_names:
        if not any(fuzz.ratio(road, unique) > threshold for unique in unique_roads):
            print(f"Original road count: {road}")
            unique_roads.append(road)
    
    return unique_roads

cleaned_roads = remove_duplicate_roads(loaded_road_names)

Original road count: دير البلح
Original road count: القدوة
Original road count: وادي الفرع
Original road count: Mohammed Bin Moslem
Original road count: شارع أبو علي الهجري
Original road count: علي الأحمر
Original road count: شارع فاس
Original road count: AL Mereeikheya
Original road count: الحريري
Original road count: طريق ديراب
Original road count: عبدالله البسام
Original road count: No. 80
Original road count: سليمان بن داود
Original road count: عبدالرزاق الجيلي
Original road count: ابن غيث
Original road count: عصمة الأسدي
Original road count: ابن شاهين
Original road count: سعد بن الربيع
Original road count: زيد بن الخطاب
Original road count: ابن الأصبح
Original road count: ابن الفصيح
Original road count: Barqah
Original road count: Al Hayathem
Original road count: عبيد الله بن يزيد
Original road count: أبي القاسم الوراق
Original road count: شارع الجائزة
Original road count: المشياح
Original road count: علي الموازيني
Original road count: أبي محمد بن المظفر
Original road count: شهاب 

In [55]:
with open("cleaned_unique_road_names.txt", "w") as f:
    for road_name in cleaned_roads:
        f.write(f"{road_name}\n")

## Filter Depend on Ratio/Distance of Roads

In [4]:
with open("cleaned_unique_road_names.txt", "r") as f:
    cleaned_roads = set(line.strip() for line in f)

In [5]:
def get_road_start_end(gmaps_client, road_name, city='Riyadh'):
    try:
        geocode_result = gmaps_client.geocode(f"{road_name}, {city}")
        if geocode_result and 'bounds' in geocode_result[0]['geometry']:
            start_point = geocode_result[0]['geometry']['bounds']['northeast']
            end_point = geocode_result[0]['geometry']['bounds']['southwest']
            return (start_point['lat'], start_point['lng']), (end_point['lat'], end_point['lng'])
        else:
            return None, None
    except Exception as e:
        print(f"Error getting start/end points for {road_name}: {e}")
        return None, None

In [20]:
def determine_traffic_condition(speed_kmh, delay_min):
    if speed_kmh < 20 and delay_min > 10:
        return 'Heavy Traffic'
    elif speed_kmh < 40:
        return 'Moderate Traffic'
    else:
        return 'Light Traffic'

def get_traffic_data(gmaps_client, road_name, city='Riyadh'):
    start_coords, end_coords = get_road_start_end(gmaps_client, road_name, city)
    
    if start_coords is None or end_coords is None:
        print(f"Could not retrieve start/end points for {road_name}.")
        return None

    origin = f"{start_coords[0]},{start_coords[1]}"
    destination = f"{end_coords[0]},{end_coords[1]}"
    
    try:
        directions_result = gmaps_client.directions(
            origin,
            destination,
            mode="driving",
            departure_time="now",
            traffic_model="best_guess"
        )

        if directions_result:
            route = directions_result[0]['legs'][0]
            duration_in_traffic_min = route['duration_in_traffic']['value'] / 60
            duration_min = route['duration']['value'] / 60
            distance_km = route['distance']['value'] / 1000
            speed_kmh = (distance_km) / (duration_in_traffic_min / 60)
            delay_min = (route['duration_in_traffic']['value'] - route['duration']['value']) / 60
            traffic_ratio = duration_in_traffic_min / duration_min
            timestamp = datetime.now().isoformat()
            day_of_week = datetime.now().strftime('%A')
            hour_of_day = datetime.now().hour
            is_peak_hour = 1 if hour_of_day in range(7, 10) or hour_of_day in range(17, 20) else 0
            time_of_day = datetime.now().strftime('%p')
            traffic_condition = determine_traffic_condition(speed_kmh, delay_min)

            traffic_data = {
                'route_id': f"{road_name}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
                'road_name': road_name,
                'origin': origin,
                'destination': destination,
                'distance_km': distance_km,
                'duration_min': duration_min,
                'duration_in_traffic_min': duration_in_traffic_min,
                'departure_time': timestamp,
                'speed_kmh': speed_kmh,
                'delay_min': delay_min,
                'traffic_ratio': traffic_ratio,
                'timestamp': timestamp,
                'day_of_week': day_of_week,
                'hour_of_day': hour_of_day,
                'is_peak_hour': is_peak_hour,
                'time_of_day': time_of_day,
                'traffic_condition': traffic_condition
            }
            return traffic_data
        else:
            return None
    except Exception as e:
        print(f"Error fetching traffic data for {road_name}: {e}")
        return None

def generate_data_for_first_10_roads(csv_file, gmaps_client):
    df = pd.read_csv(csv_file)
    origins_destinations = df['Road Name'].head(10).tolist()  
    traffic_data_list = []

    for road_name in origins_destinations:
        traffic_data = get_traffic_data(gmaps_client, road_name, city='Riyadh')  
        if traffic_data:
            traffic_data_list.append(traffic_data)

    traffic_df = pd.DataFrame(traffic_data_list)

    traffic_df.to_csv('test_traffic_data.csv', index=False)
    print("Traffic data for the first 10 roads saved to 'test_traffic_data.csv'.")
    
generate_data_for_first_10_roads('filtered_road_names.csv', gmaps)


Traffic data for the first 10 roads saved to 'test_traffic_data.csv'.


In [12]:
df = pd.DataFrame(top_200_heap, columns=['Negative Ratio', 'Road Name', 'Distance (km)', 'Time (mins)'])

df['Ratio (km/min)'] = -df['Negative Ratio']

df = df.drop(columns=['Negative Ratio'])

df.to_csv('filtered_road_names.csv', index=False)

print(f"Data saved to filtered_road_names.csv")

Data saved to filtered_road_names.csv


## Add start,end points to csv

In [21]:
df = pd.read_csv('filtered_road_names.csv')
df.head()

Unnamed: 0,Road Name,Distance (km),Time (mins),Ratio (km/min)
0,العجلية,156.233,86.0,1.816663
1,طريق الأرطاوية أم الجماجم,201.086,112.266667,1.791146
2,شارع برودان,352.026,196.2,1.79422
3,القويعية,17.266,9.7,1.78
4,صلبوخ,36.323,25.416667,1.429102


In [44]:
df[['Start_point','End_point']] = df['Road Name'].apply(lambda x: pd.Series(get_road_start_end(gmaps, x, city='Riyadh')))

In [46]:
df.to_csv('filtered_road_names.csv', index=False)