In [7]:
# ==============================================================================
# STEP 1: INSTALL AND IMPORT NECESSARY TOOLS (LIBRARIES)
# ==============================================================================

# The % symbol means this is a "magic command" - special instructions for Jupyter
# This installs external software packages we need:
# - requests: for sending HTTP requests to Google's servers
# - pandas: for working with spreadsheet-like data (CSV files)
%pip install requests pandas

# "import" means "bring in tools from external libraries so we can use them"
# Think of this like borrowing specialized calculators from different departments

import requests    # For communicating with Google's API over the internet
import json       # For working with JSON data format (JavaScript Object Notation)
import itertools  # For generating mathematical combinations and permutations
import pandas as pd  # For reading CSV files and working with tabular data
                    # "as pd" means we can write "pd" instead of "pandas" later


# ==============================================================================
# STEP 2: SET UP GOOGLE API CREDENTIALS AND ENDPOINT
# ==============================================================================

# This is like a password that identifies us to Google's servers
# Google gives you this key when you create an account with them
API_KEY = "AIzaSyDmRcZpRQ068YNZbcKuRU5cSAKY9O17C0Q" # Replace with your actual API key

# This is the web address (URL) where Google's routing service lives
# It's like a mailing address, but for computer programs
# When we send requests here, Google calculates driving routes for us
url = "https://routes.googleapis.com/directions/v2:computeRoutes"

# ==============================================================================
# STEP 3: LOAD DMV OFFICE DATA FROM CSV FILE
# ==============================================================================

# CSV stands for "Comma Separated Values" - it's like a spreadsheet saved as text
# pd.read_csv() reads the CSV file and creates a "DataFrame" (like a table)
# A DataFrame has rows and columns, just like a spreadsheet in Excel
locations_df = pd.read_csv('output/dmv_offices_details.csv')

# ==============================================================================
# STEP 4: CONVERT SPREADSHEET DATA INTO A LIST OF OFFICE INFORMATION
# ==============================================================================

# We're creating an empty list to store information about each office
# A "list" in programming is like a mathematical sequence: [item1, item2, item3, ...]
office_locations = []

# This is a "for loop" - it repeats the same actions for each row in our spreadsheet
# "iterrows()" goes through each row of the DataFrame one by one
# The underscore "_" means we don't care about the row index number
# "row" is a variable that represents the current row we're looking at
for _, row in locations_df.iterrows():
    
    # For each office, we create a "dictionary" (like a mini-database entry)
    # A dictionary stores key-value pairs: {'key': value, 'another_key': another_value}
    # Think of it like a filing cabinet where each drawer has a label
    office_locations.append({
        'name': row['office_name'],    # Get the office name from this row
        'latitude': row['latitude'],   # Get the latitude coordinate
        'longitude': row['longitude']  # Get the longitude coordinate
    })

# ==============================================================================
# STEP 5: DISPLAY SUMMARY INFORMATION
# ==============================================================================

# len() gives us the length (number of items) in our list
# f"text {variable}" is called an "f-string" - it inserts the variable into the text
print(f"Loaded {len(office_locations)} DMV offices")
print("Sample offices:")

# Show information about the first 5 offices
# [:5] means "take items from position 0 to 4" (the first 5 items)
# enumerate() gives us both the position number (i) and the item (office)
for i, office in enumerate(office_locations[:5]):
    # .4f means "show 4 decimal places for floating point numbers"
    print(f"  {office['name']}: ({office['latitude']:.4f}, {office['longitude']:.4f})")

# ==============================================================================
# STEP 6: GENERATE ALL POSSIBLE OFFICE-TO-OFFICE ROUTE COMBINATIONS
# ==============================================================================

# In mathematics, a "permutation" is an arrangement where order matters
# For example, if we have offices A, B, C, then (A→B) is different from (B→A)
# itertools.permutations(list, 2) gives us all possible pairs where order matters
# This creates every possible route from one office to another office
office_pairs = list(itertools.permutations(office_locations, 2))

# For testing purposes, we only take the first 6 routes
# This prevents us from making 27,722 API calls (which would cost ~$138)
# Comment out this line if you want to calculate ALL possible routes
office_pairs = office_pairs[:6]  # Remove this line for full analysis

# Display how many routes we're going to calculate
print(f"\nGenerated {len(office_pairs)} office-to-office routes to calculate")

# Show the first 5 offices again (this line displays the result)
office_locations[:5]  # Show first 5 offices


Note: you may need to restart the kernel to use updated packages.
Loaded 167 DMV offices
Sample offices:
  Alturas: (41.4919, -120.5498)
  Arleta: (34.2479, -118.4452)
  Arvin: (35.2113, -118.8333)
  Auburn: (38.9070, -121.0827)
  Bakersfield: (35.3878, -119.0233)

Generated 6 office-to-office routes to calculate


[{'name': 'Alturas', 'latitude': 41.4919116, 'longitude': -120.5498434},
 {'name': 'Arleta', 'latitude': 34.2479119, 'longitude': -118.4452195},
 {'name': 'Arvin', 'latitude': 35.2112669, 'longitude': -118.8333219},
 {'name': 'Auburn', 'latitude': 38.9069626, 'longitude': -121.082718},
 {'name': 'Bakersfield', 'latitude': 35.3878084, 'longitude': -119.0233475}]

In [None]:
# ==============================================================================
# STEP 7: PREPARE API REQUEST DATA FOR EACH OFFICE PAIR
# ==============================================================================

# We need to create two empty lists:
# 1. request_bodies: contains the data we'll send to Google (API format)
# 2. office_info: contains human-readable office information (for our reference)
request_bodies = []
office_info = []  # Store office info separately

# This loop goes through each office pair (origin → destination)
# Remember: office_pairs contains tuples like (office1, office2)
for origin_office, destination_office in office_pairs:
    
    # Create a dictionary in the exact format Google's API expects
    # This is like filling out a form with very specific fields
    # Google requires nested dictionaries (dictionaries inside dictionaries)
    data = {
      "origin": {                          # Where the journey starts
        "location": {                      # Location information
          "latLng": {                      # Latitude and longitude coordinates
            "latitude": origin_office['latitude'],    # Starting latitude
            "longitude": origin_office['longitude']   # Starting longitude
          }
        }
      },
      "destination": {                     # Where the journey ends
        "location": {                      # Location information
          "latLng": {                      # Latitude and longitude coordinates
            "latitude": destination_office['latitude'],   # Ending latitude
            "longitude": destination_office['longitude']  # Ending longitude
          }
        }
      },
      "travelMode": "DRIVE",               # We want driving directions (not walking/transit)
      # Add other parameters as needed (e.g., routingPreference, departureTime)
    }
    
    # Add this formatted request data to our list
    request_bodies.append(data)
    
    # Store office info separately (same index as request_bodies)
    # This keeps track of which offices correspond to which request
    office_info.append({
        'origin_office': origin_office,
        'destination_office': destination_office
    })

# ==============================================================================
# STEP 8: SET UP HTTP REQUEST HEADERS
# ==============================================================================

# Headers are like the "envelope" information for our API requests
# They tell Google's servers important metadata about our request
headers = {
    "Content-Type": "application/json",    # We're sending JSON data
    "X-Goog-Api-Key": API_KEY,             # Our authentication key
    "X-Goog-FieldMask": "routes.duration,routes.distanceMeters",  # Only return distance and duration
}

# ==============================================================================
# STEP 9: SEND API REQUESTS AND PROCESS RESPONSES
# ==============================================================================

# Create an empty list to store all successful results
results = []

# Loop through each request we prepared earlier
# enumerate() gives us both the position (i) and the request data
for i, data in enumerate(request_bodies):
    
    # Show progress to the user
    # The → symbol shows origin → destination
    print(f"Sending request {i+1}/{len(request_bodies)}: {office_info[i]['origin_office']['name']} → {office_info[i]['destination_office']['name']}")
    
    # Send POST request to Google's API
    # POST means we're sending data to Google (not just asking for a webpage)
    # json=data converts our dictionary to JSON format
    # headers=headers includes our authentication and metadata
    response = requests.post(url, json=data, headers=headers)

    # Check if the request was successful
    # HTTP status code 200 means "OK" - the request worked
    if response.status_code == 200:
        
        # Convert Google's JSON response back into a Python dictionary
        # This is like unpacking a letter Google sent back to us
        response_data = response.json()
        
        # Google might return multiple route options, but we want the first one
        # Check if routes exist and the list is not empty
        if "routes" in response_data and response_data["routes"]:
            
            # Get the first (and usually best) route from the list
            first_route = response_data["routes"][0]
            
            # Extract the distance and duration from the route
            # .get() is a safe way to get values - returns None if key doesn't exist
            distance = first_route.get("distanceMeters")  # Distance in meters
            duration = first_route.get("duration")        # Duration as a string like "1234s"

            # Only proceed if we got a valid distance measurement
            if distance is not None:
                
                # Get the office information for this request
                origin_office = office_info[i]['origin_office']
                destination_office = office_info[i]['destination_office']

                # Create a complete result entry with all the information
                # This combines Google's API response with our office data
                results.append({
                    "origin_office_name": origin_office['name'],
                    "origin_latitude": origin_office['latitude'],
                    "origin_longitude": origin_office['longitude'],
                    "destination_office_name": destination_office['name'],
                    "destination_latitude": destination_office['latitude'],
                    "destination_longitude": destination_office['longitude'],
                    "distanceMeters": distance,
                    "duration": duration
                })
                
                # Show success message with formatted distance
                # {:,} adds commas to large numbers (e.g., 1,000,000)
                print(f"  ✅ Success: {distance:,} meters, {duration}")
            else:
                # Google returned a route but no distance data
                print(f"  ❌ Could not retrieve distance")
                print(f"Response data: {response_data}")
        else:
            # Google couldn't find any routes between these offices
            print(f"  ❌ No routes found in response")
            print(f"Response data: {response_data}")
    else:
        # The HTTP request failed (network error, authentication error, etc.)
        print(f"  ❌ Error: Status {response.status_code}")
        print(f"Response Text: {response.text}")

# ==============================================================================
# STEP 10: SAVE RESULTS TO A FILE
# ==============================================================================

# Define the filename where we'll save our results
output_filename = "dmv_office_route_distances.json"

# Open a file for writing
# 'w' means "write mode" - create new file or overwrite existing
# The 'with' statement automatically closes the file when done
with open(output_filename, 'w') as f:
    # Convert our results list to JSON format and write to file
    # indent=4 makes the JSON human-readable with nice formatting
    json.dump(results, f, indent=4)

# ==============================================================================
# STEP 11: DISPLAY SUMMARY OF RESULTS
# ==============================================================================

print(f"Results saved to {output_filename}")
print(f"Total routes calculated: {len(results)}")
print(f"Sample result:")

# Show an example result if we have any
if results:
    print(f"  {results[0]['origin_office_name']} → {results[0]['destination_office_name']}")
    print(f"  Distance: {results[0]['distanceMeters']:,} meters")
    print(f"  Duration: {results[0]['duration']}")

Sending request 1/6: Alturas → Arleta
  ✅ Success: 1,000,148 meters, 36018s
Sending request 2/6: Alturas → Arvin
  ✅ Success: 962,153 meters, 33537s
Sending request 3/6: Alturas → Auburn
  ✅ Success: 436,044 meters, 15693s
Sending request 4/6: Alturas → Bakersfield
  ✅ Success: 926,407 meters, 32110s
Sending request 5/6: Alturas → Bakersfield Southwest
  ✅ Success: 937,436 meters, 32673s
Sending request 6/6: Alturas → Barstow
  ✅ Success: 941,753 meters, 34191s
Results saved to dmv_office_route_distances.json
Total routes calculated: 6
Sample result:
  Alturas → Arleta
  Distance: 1,000,148 meters
  Duration: 36018s
