In [2]:
pip install pyspark


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
# === Install Required Packages ===
!pip install openrouteservice pandas requests

# === IMPORTS ===
import pandas as pd
import openrouteservice
import requests
import time

# === ORS API Key ===
ORS_API_KEY = '5b3ce3597851110001cf6248d3ad9e160c2243728d2f5ef309fdce20'
client = openrouteservice.Client(key=ORS_API_KEY)

# === WeatherAPI Key ===
WEATHER_API_KEY = 'aa46c37c5b04427fa3d201341251404'

# === City Coordinates ===
citiescordins = {
    "Nashville, TN": [-86.7816, 36.1627],
    "Salt Lake City, UT": [-111.8910, 40.7608],
    "Denver, CO": [-104.9903, 39.7392],
    "New York City, NY": [-74.0060, 40.7128],
    "Cincinnati, OH": [-84.5120, 39.1031],
    "Seattle, WA": [-122.3321, 47.6062],
    "Orlando, FL": [-81.3792, 28.5383],
    "Washington, DC": [-77.0369, 38.9072],
    "Detroit, MI": [-83.0458, 42.3314],
    "Houston, TX": [-95.3698, 29.7604],
    "Miami, FL": [-80.1918, 25.7617],
    "Baltimore, MD": [-76.6122, 39.2904],
    "Pittsburgh, PA": [-79.9959, 40.4406],
    "Las Vegas, NV": [-115.1398, 36.1699],
    "Chicago, IL": [-87.6298, 41.8781],
    "Boston, MA": [-71.0589, 42.3601],
    "Boise, ID": [-116.2023, 43.6150],
    "Portland, ME": [-70.2553, 43.6591],
    "Portland, OR": [-122.6765, 45.5051],
    "Harrisburg, PA": [-76.8844, 40.2732]
}

# === Create City Pair Routes ===
routes = []
for source in citiescordins:
    for destination in citiescordins:
        if source != destination:
            routes.append({
                "sourcecity": source,
                "destinationcity": destination,
                "sourcelon": citiescordins[source][0],
                "sourcelat": citiescordins[source][1],
                "destlon": citiescordins[destination][0],
                "destlat": citiescordins[destination][1]
            })

df_routes = pd.DataFrame(routes)
df_routes.to_csv("city_pairs.csv", index=False)
print("city_pairs.csv created successfully.")

# === Load city pairs for routing ===
df = pd.read_csv("city_pairs.csv")

# === ORS: Get Distance + Duration ===
routes_data = []

for idx, row in df.iterrows():
    source_coords = (row['sourcelon'], row['sourcelat'])
    dest_coords = (row['destlon'], row['destlat'])
    
    try:
        route = client.directions((source_coords, dest_coords), profile='driving-hgv', format='geojson')
        segment = route['features'][0]['properties']['segments'][0]

        routes_data.append({
            "sourcecity": row['sourcecity'],
            "destinationcity": row['destinationcity'],
            "distance_km": round(segment['distance'] / 1000, 2),
            "duration_hr": round(segment['duration'] / 3600, 2)
        })

        print(f"{row['sourcecity']} → {row['destinationcity']}: {segment['distance']/1000:.2f} km, {segment['duration']/3600:.2f} hrs")
        time.sleep(1)

    except Exception as e:
        print(f" Error fetching route: {row['sourcecity']} → {row['destinationcity']}: {e}")
        routes_data.append({
            "sourcecity": row['sourcecity'],
            "destinationcity": row['destinationcity'],
            "distance_km": None,
            "duration_hr": None
        })
        time.sleep(2)

df_routes_final = pd.DataFrame(routes_data)

# === WEATHER API: Enrich with Destination Weather ===
df_routes_final['city_for_weather'] = df_routes_final['destinationcity'].apply(lambda x: x.split(',')[0].strip())
unique_cities = df_routes_final['city_for_weather'].unique()

weather_data = {}

for city in unique_cities:
    try:
        url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}"
        response = requests.get(url, timeout=5)
        data = response.json()

        if response.status_code == 200 and "current" in data:
            description = data['current']['condition']['text']
            temp_c = round(data['current']['temp_c'], 2)
        else:
            description = "unknown"
            temp_c = None

        weather_data[city] = {
            'weather_description': description,
            'temperature_c': temp_c
        }

        print(f" {city}: {description}, {temp_c}°C")
        time.sleep(1)

    except Exception as e:
        print(f" Error fetching weather for {city}: {e}")
        weather_data[city] = {
            'weather_description': 'unknown',
            'temperature_c': None
        }

df_routes_final['weather_description'] = df_routes_final['city_for_weather'].map(lambda x: weather_data[x]['weather_description'])
df_routes_final['temperature_c'] = df_routes_final['city_for_weather'].map(lambda x: weather_data[x]['temperature_c'])

df_routes_final.drop(columns=['city_for_weather'], inplace=True)

# === Save Final Output ===
df_routes_final.to_csv("final_route_data.csv", index=False)
print("\n final_route_data.csv saved with distance, duration, and weather.")

Defaulting to user installation because normal site-packages is not writeable
city_pairs.csv created successfully.
Nashville, TN → Salt Lake City, UT: 2633.14 km, 37.36 hrs
Nashville, TN → Denver, CO: 1869.60 km, 27.05 hrs
Nashville, TN → New York City, NY: 1429.29 km, 20.94 hrs
Nashville, TN → Cincinnati, OH: 462.86 km, 6.49 hrs
Nashville, TN → Seattle, WA: 3865.20 km, 51.90 hrs
Nashville, TN → Orlando, FL: 1105.63 km, 16.59 hrs
Nashville, TN → Washington, DC: 1065.64 km, 15.62 hrs
Nashville, TN → Detroit, MI: 910.24 km, 12.44 hrs
Nashville, TN → Houston, TX: 1263.15 km, 17.14 hrs
Nashville, TN → Miami, FL: 1466.96 km, 21.99 hrs
Nashville, TN → Baltimore, MD: 1131.37 km, 16.42 hrs
Nashville, TN → Pittsburgh, PA: 940.24 km, 12.98 hrs
Nashville, TN → Las Vegas, NV: 2888.57 km, 40.78 hrs
Nashville, TN → Chicago, IL: 745.89 km, 10.65 hrs
Nashville, TN → Boston, MA: 1814.89 km, 26.59 hrs
Nashville, TN → Boise, ID: 3109.27 km, 43.00 hrs
Nashville, TN → Portland, ME: 1974.48 km, 29.14 hrs
Na

In [4]:
df = pd.read_csv("final_route_data.csv")
df.head()

Unnamed: 0,sourcecity,destinationcity,distance_km,duration_hr,weather_description,temperature_c
0,"Nashville, TN","Salt Lake City, UT",2633.14,37.36,Partly cloudy,21.1
1,"Nashville, TN","Denver, CO",1869.6,27.05,Sunny,13.9
2,"Nashville, TN","New York City, NY",1429.29,20.94,Partly Cloudy,17.2
3,"Nashville, TN","Cincinnati, OH",462.86,6.49,Overcast,19.4
4,"Nashville, TN","Seattle, WA",3865.2,51.9,Overcast,9.4


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [6]:
# Load base data
df_base = pd.read_csv("final_route_data.csv")

# Simulation parameters
total_rows = 196_000_000       
chunk_size = 1_000_000
chunks = total_rows // chunk_size

# Truck types with modifiers
truck_types = {
    "Light Duty": 1.00,
    "Medium Duty": 1.05,
    "Heavy Duty": 1.15,
    "Refrigerated": 1.10
}
truck_names = list(truck_types.keys())

# Output file name
output_file = "synthetic_shipping_data_21gb.csv"

# Final columns
columns = [
    'sourcecity', 'destinationcity', 'distance_km', 'duration_hr',
    'weather_description', 'temperature_c', 'truck_type', 'timestamp'
]

In [7]:
# Write header
with open(output_file, "w") as f:
    f.write(",".join(columns) + "\n")

print("Created output file with headers:", output_file)

Created output file with headers: synthetic_shipping_data_21gb.csv


In [8]:
print(" Starting simulation...")

for i in range(chunks):
    print(f" Generating chunk {i+1}/{chunks}")

    # Randomly select attributes
    trucks = np.random.choice(truck_names, chunk_size)
    modifiers = [truck_types[t] for t in trucks]

    data = {
        'sourcecity': np.random.choice(df_base['sourcecity'], chunk_size),
        'destinationcity': np.random.choice(df_base['destinationcity'], chunk_size),
        'distance_km': (
            np.random.choice(df_base['distance_km'], chunk_size) +
            np.random.normal(0, 10, chunk_size)  # simulate variation
        ),
        'weather_description': np.random.choice(df_base['weather_description'], chunk_size),
        'temperature_c': (
            np.random.choice(df_base['temperature_c'], chunk_size) +
            np.random.normal(0, 1.5, chunk_size)
        ),
        'truck_type': trucks,
        'timestamp': pd.date_range(start='2023-01-01', periods=chunk_size, freq='s')
    }

    df_chunk = pd.DataFrame(data)

    # Simulated duration with truck type modifier
    base_duration = (
        np.random.choice(df_base['duration_hr'], chunk_size) +
        np.random.normal(0, 1, chunk_size)
    )
    df_chunk['duration_hr'] = base_duration * [truck_types[t] for t in df_chunk['truck_type']]

    # Rearrange columns
    df_chunk = df_chunk[columns]

    # Append to CSV
    df_chunk.to_csv(output_file, mode='a', header=False, index=False)

    print(f" Chunk {i+1} written.")

 Starting simulation...
 Generating chunk 1/196
 Chunk 1 written.
 Generating chunk 2/196
 Chunk 2 written.
 Generating chunk 3/196
 Chunk 3 written.
 Generating chunk 4/196
 Chunk 4 written.
 Generating chunk 5/196
 Chunk 5 written.
 Generating chunk 6/196
 Chunk 6 written.
 Generating chunk 7/196
 Chunk 7 written.
 Generating chunk 8/196
 Chunk 8 written.
 Generating chunk 9/196
 Chunk 9 written.
 Generating chunk 10/196
 Chunk 10 written.
 Generating chunk 11/196
 Chunk 11 written.
 Generating chunk 12/196
 Chunk 12 written.
 Generating chunk 13/196
 Chunk 13 written.
 Generating chunk 14/196
 Chunk 14 written.
 Generating chunk 15/196
 Chunk 15 written.
 Generating chunk 16/196
 Chunk 16 written.
 Generating chunk 17/196
 Chunk 17 written.
 Generating chunk 18/196
 Chunk 18 written.
 Generating chunk 19/196
 Chunk 19 written.
 Generating chunk 20/196
 Chunk 20 written.
 Generating chunk 21/196
 Chunk 21 written.
 Generating chunk 22/196
 Chunk 22 written.
 Generating chunk 23/196
 

In [9]:
print("Simulation complete!")
print("File saved as:", output_file)


Simulation complete!
File saved as: synthetic_shipping_data_21gb.csv


In [10]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("UploadCSVToHDFS") \
    .config("spark.hadoop.fs.defaultFS", "hdfs://localhost:9000") \
    .getOrCreate()

df = spark.read.csv("file:///home/varun/Desktop/synthetic_shipping_data_21gb.csv", header=True, inferSchema=True)

df.write \
  .option("header", True) \
  .mode("overwrite") \
  .csv("/user/varun/shipping_data")


25/05/12 10:21:22 WARN Utils: Your hostname, ubuntu resolves to a loopback address: 127.0.1.1; using 10.0.2.15 instead (on interface enp0s3)
25/05/12 10:21:22 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/12 10:21:23 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
                                                                                

In [11]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, countDistinct, count, min, max, avg, expr

# Start Spark session
spark = SparkSession.builder \
    .appName("ShippingOptimizationEDA") \
    .config("spark.hadoop.fs.defaultFS", "hdfs://localhost:9000") \
    .getOrCreate()

# Read from HDFS (correct folder path)
df = spark.read \
    .option("header", True) \
    .csv("hdfs://localhost:9000/user/varun/shipping_data")

# Cast distance_km column to numeric
df = df.withColumn("distance_km", col("distance_km").cast("double"))

# Count total records
print(" Total records:", df.count())

# Distinct counts
df.select(countDistinct("sourcecity")).show(truncate=False)
df.select(countDistinct("destinationcity")).show(truncate=False)
df.select("sourcecity", "destinationcity").dropDuplicates().count()

# Distance stats
df.select(
    min("distance_km").alias("min_distance_km"),
    max("distance_km").alias("max_distance_km"),
    avg("distance_km").alias("avg_distance_km")
).show(truncate=False)

# Top 5 longest routes
print("\n Top 5 Longest Routes:")
df.select("sourcecity", "destinationcity", "distance_km") \
  .orderBy(col("distance_km").desc()) \
  .limit(5).show(truncate=False)

# Top 5 shortest non-zero routes
print("\n Top 5 Shortest Non-Zero Routes:")
df.filter(col("distance_km") > 0) \
  .select("sourcecity", "destinationcity", "distance_km") \
  .orderBy("distance_km") \
  .limit(5).show(truncate=False)


25/05/12 10:39:08 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
                                                                                

 Total records: 196000000


                                                                                

+--------------------------+
|count(DISTINCT sourcecity)|
+--------------------------+
|20                        |
+--------------------------+



                                                                                

+-------------------------------+
|count(DISTINCT destinationcity)|
+-------------------------------+
|20                             |
+-------------------------------+



                                                                                

+------------------+-----------------+-----------------+
|min_distance_km   |max_distance_km  |avg_distance_km  |
+------------------+-----------------+-----------------+
|29.961738244688476|5373.819321891454|2291.717263228566|
+------------------+-----------------+-----------------+


 Top 5 Longest Routes:


                                                                                

+--------------+---------------+-----------------+
|sourcecity    |destinationcity|distance_km      |
+--------------+---------------+-----------------+
|Detroit, MI   |Portland, OR   |5373.819321891454|
|Washington, DC|Boston, MA     |5371.000621437923|
|Boston, MA    |Nashville, TN  |5370.000870118544|
|Orlando, FL   |Seattle, WA    |5369.942453249165|
|Boise, ID     |Washington, DC |5368.114734215875|
+--------------+---------------+-----------------+


 Top 5 Shortest Non-Zero Routes:




+--------------+------------------+------------------+
|sourcecity    |destinationcity   |distance_km       |
+--------------+------------------+------------------+
|Boston, MA    |Portland, ME      |29.961738244688476|
|Harrisburg, PA|Houston, TX       |30.01121087033956 |
|Washington, DC|Denver, CO        |30.744669518569125|
|Houston, TX   |Chicago, IL       |31.4749576989733  |
|Houston, TX   |Salt Lake City, UT|31.60285341529176 |
+--------------+------------------+------------------+



                                                                                

In [12]:
from pyspark.sql import SparkSession
import pandas as pd
import numpy as np
import random

spark = SparkSession.builder \
    .appName("ACO_FixedStartEnd") \
    .config("spark.hadoop.fs.defaultFS", "hdfs://localhost:9000") \
    .config("spark.driver.memory", "4g") \
    .getOrCreate()

df = spark.read \
    .option("header", True) \
    .csv("hdfs://localhost:9000/user/varun/shipping_data") \
    .select("sourcecity", "destinationcity", "distance_km") \
    .limit(1000)

df = df.withColumn("distance_km", df["distance_km"].cast("double"))
pdf = df.toPandas()

cities = sorted(set(pdf['sourcecity']).union(set(pdf['destinationcity'])))
city_index = {city: i for i, city in enumerate(cities)}
index_to_city = {i: city for city, i in city_index.items()}
n = len(cities)

start_candidates = [c for c in cities if "harrisburg" in c.lower()]
end_candidates = [c for c in cities if "new york" in c.lower()]

if not start_candidates or not end_candidates:
    raise ValueError("Could not find city matching 'Harrisburg' or 'New York'.")

start_city = start_candidates[0]
end_city = end_candidates[0]

start_index = city_index[start_city]
end_index = city_index[end_city]

dist_matrix = np.full((n, n), np.inf)
for _, row in pdf.iterrows():
    i, j = city_index[row['sourcecity']], city_index[row['destinationcity']]
    dist_matrix[i][j] = row['distance_km']

n_ants = 10
n_iterations = 30
alpha = 1.0
beta = 5.0
evaporation = 0.5
Q = 100

pheromones = np.ones((n, n))

def probability(i, visited):
    probs = []
    for j in range(n):
        if j in visited or dist_matrix[i][j] == np.inf:
            probs.append(0)
        else:
            probs.append((pheromones[i][j] ** alpha) * ((1 / dist_matrix[i][j]) ** beta))
    total = sum(probs)
    return [p / total if total > 0 else 0 for p in probs]

def route_length(route):
    return sum(dist_matrix[route[i]][route[i+1]] for i in range(len(route) - 1))

best_route = None
best_length = float('inf')

for _ in range(n_iterations):
    routes = []
    lengths = []
    for _ in range(n_ants):
        visited = [start_index]
        current = start_index
        while len(visited) < n - 1:
            probs = probability(current, visited + [end_index])
            if sum(probs) == 0:
                break
            next_city = np.random.choice(range(n), p=probs)
            if next_city != end_index:
                visited.append(next_city)
                current = next_city
        visited.append(end_index)
        if len(visited) == n:
            length = route_length(visited)
            routes.append(visited)
            lengths.append(length)
            if length < best_length:
                best_length = length
                best_route = visited
    pheromones *= (1 - evaporation)
    for route, length in zip(routes, lengths):
        for i in range(len(route) - 1):
            pheromones[route[i]][route[i+1]] += Q / length

aco_route = [index_to_city[i] for i in best_route] if best_route else []

print("ACO Start city:", start_city)
print("ACO End city:", end_city)
print("ACO Route:")
print(" ➝ ".join(aco_route))
print("ACO Total Distance (km):", round(best_length, 2) if best_route else "N/A")


25/05/12 11:05:51 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


ACO Start city: Harrisburg, PA
ACO End city: New York City, NY
ACO Route:
Harrisburg, PA ➝ Baltimore, MD ➝ Las Vegas, NV ➝ Denver, CO ➝ Cincinnati, OH ➝ Miami, FL ➝ Portland, ME ➝ Salt Lake City, UT ➝ Seattle, WA ➝ Boston, MA ➝ Boise, ID ➝ Houston, TX ➝ Portland, OR ➝ Nashville, TN ➝ Orlando, FL ➝ Chicago, IL ➝ Pittsburgh, PA ➝ Washington, DC ➝ Detroit, MI ➝ New York City, NY
ACO Total Distance (km): 12576.04


In [13]:
from pyspark.sql import SparkSession
import pandas as pd
import numpy as np
import random

spark = SparkSession.builder \
    .appName("GA_FixedStartEnd") \
    .config("spark.hadoop.fs.defaultFS", "hdfs://localhost:9000") \
    .config("spark.driver.memory", "4g") \
    .getOrCreate()

df = spark.read \
    .option("header", True) \
    .csv("hdfs://localhost:9000/user/varun/shipping_data") \
    .select("sourcecity", "destinationcity", "distance_km") \
    .limit(1000)

df = df.withColumn("distance_km", df["distance_km"].cast("double"))
pdf = df.toPandas()

cities = sorted(set(pdf['sourcecity']).union(set(pdf['destinationcity'])))
city_index = {city: i for i, city in enumerate(cities)}
index_to_city = {i: city for city, i in city_index.items()}
n = len(cities)

start_candidates = [c for c in cities if "harrisburg" in c.lower()]
end_candidates = [c for c in cities if "new york" in c.lower()]

if not start_candidates or not end_candidates:
    raise ValueError("Could not find city matching 'Harrisburg' or 'New York'.")

start_city = start_candidates[0]
end_city = end_candidates[0]
start_index = city_index[start_city]
end_index = city_index[end_city]

dist_matrix = np.full((n, n), np.inf)
for _, row in pdf.iterrows():
    i, j = city_index[row['sourcecity']], city_index[row['destinationcity']]
    dist_matrix[i][j] = row['distance_km']

def total_distance(route):
    full_route = [start_index] + route + [end_index]
    return sum(dist_matrix[full_route[i]][full_route[i+1]] for i in range(len(full_route)-1))

def create_population(size):
    middle_cities = [i for i in range(n) if i not in (start_index, end_index)]
    return [random.sample(middle_cities, len(middle_cities)) for _ in range(size)]

def crossover(p1, p2):
    a, b = sorted(random.sample(range(len(p1)), 2))
    child = [-1] * len(p1)
    child[a:b] = p1[a:b]
    fill = [x for x in p2 if x not in child]
    pos = 0
    for i in range(len(p1)):
        if child[i] == -1:
            child[i] = fill[pos]
            pos += 1
    return child

def mutate(route, rate=0.01):
    for i in range(len(route)):
        if random.random() < rate:
            j = random.randint(0, len(route)-1)
            route[i], route[j] = route[j], route[i]
    return route

population_size = 50
generations = 100
mutation_rate = 0.02

population = create_population(population_size)

for _ in range(generations):
    population = sorted(population, key=total_distance)
    next_gen = population[:10]
    while len(next_gen) < population_size:
        p1, p2 = random.sample(population[:25], 2)
        child = crossover(p1, p2)
        child = mutate(child, mutation_rate)
        next_gen.append(child)
    population = next_gen

best_middle = population[0]
best_full = [start_index] + best_middle + [end_index]
best_distance = total_distance(best_middle)

ga_route = [index_to_city[i] for i in best_full]

print("GA Start city:", start_city)
print("GA End city:", end_city)
print("GA Route:")
print(" ➝ ".join(ga_route))
print("GA Total Distance (km):", round(best_distance, 2))


25/05/12 11:06:01 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


GA Start city: Harrisburg, PA
GA End city: New York City, NY
GA Route:
Harrisburg, PA ➝ Salt Lake City, UT ➝ Orlando, FL ➝ Chicago, IL ➝ Houston, TX ➝ Boston, MA ➝ Baltimore, MD ➝ Portland, OR ➝ Nashville, TN ➝ Portland, ME ➝ Boise, ID ➝ Seattle, WA ➝ Pittsburgh, PA ➝ Washington, DC ➝ Las Vegas, NV ➝ Denver, CO ➝ Cincinnati, OH ➝ Miami, FL ➝ Detroit, MI ➝ New York City, NY
GA Total Distance (km): 14305.87


In [14]:
pip install folium geopy


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [15]:
import folium

# Coordinates for all possible cities in your dataset
city_coords = {
    "Harrisburg, PA": (40.2732, -76.8867),
    "Houston, TX": (29.7604, -95.3698),
    "Denver, CO": (39.7392, -104.9903),
    "Miami, FL": (25.7617, -80.1918),
    "Baltimore, MD": (39.2904, -76.6122),
    "Orlando, FL": (28.5383, -81.3792),
    "Las Vegas, NV": (36.1699, -115.1398),
    "Boise, ID": (43.6150, -116.2023),
    "Nashville, TN": (36.1627, -86.7816),
    "Salt Lake City, UT": (40.7608, -111.8910),
    "Portland, ME": (43.6591, -70.2568),
    "Boston, MA": (42.3601, -71.0589),
    "Detroit, MI": (42.3314, -83.0458),
    "Chicago, IL": (41.8781, -87.6298),
    "Cincinnati, OH": (39.1031, -84.5120),
    "Pittsburgh, PA": (40.4406, -79.9959),
    "Portland, OR": (45.5051, -122.6750),
    "Washington, DC": (38.9072, -77.0369),
    "Seattle, WA": (47.6062, -122.3321),
    "New York City, NY": (40.7128, -74.0060)
}

# Helper: get coordinates from route
def coords_from_route(route):
    return [city_coords[city] for city in route if city in city_coords]

# Get coordinates
aco_coords = coords_from_route(aco_route)
ga_coords = coords_from_route(ga_route)

# Center the map roughly at the midpoint of GA route
midpoint = ga_coords[len(ga_coords)//2]
m = folium.Map(location=midpoint, zoom_start=5)

# Draw routes
folium.PolyLine(aco_coords, color="blue", weight=4, tooltip="ACO Route").add_to(m)
folium.PolyLine(ga_coords, color="green", weight=4, tooltip="GA Route").add_to(m)

# Add markers
for city in set(aco_route + ga_route):
    if city in city_coords:
        folium.Marker(location=city_coords[city], tooltip=city).add_to(m)

# Highlight start/end for both routes
folium.Marker(location=city_coords[aco_route[0]], icon=folium.Icon(color='red'), tooltip="Start (ACO)").add_to(m)
folium.Marker(location=city_coords[aco_route[-1]], icon=folium.Icon(color='darkred'), tooltip="End (ACO)").add_to(m)
folium.Marker(location=city_coords[ga_route[0]], icon=folium.Icon(color='green'), tooltip="Start (GA)").add_to(m)
folium.Marker(location=city_coords[ga_route[-1]], icon=folium.Icon(color='darkgreen'), tooltip="End (GA)").add_to(m)

# Save the map
m.save("combined_route_map1.html")
