In [4]:
df["full_address"] = (
    df["Address - House number, street, and apartment number"] + ", " +
    df["City"] + ", " +
    df["State"] + ", " +
    df["ZIP"].astype(str)
)


In [5]:
print(df[["full_address"]].head())


                                      full_address
0                                              NaN
1      1812 Navy Street, Santa Monica, CA, 90045.0
2  11708 exposition blvd, Los Angeles, Ca, 90064.0
3      716 Rochedale Way, Los Angeles, CA, 90049.0
4     3512 Crestmont Ave, Los Angeles, CA, 90026.0


In [7]:
from geopy.geocoders import Nominatim
import concurrent.futures
import pandas as pd
import time

df = df.dropna(subset=["full_address"])  # Remove missing addresses


# Initialize geocoder
geolocator = Nominatim(user_agent="my_geocoder", timeout=10)

# Modify function to accept extra arguments (but ignore them)
def get_lat_lon(full_address):
    try:
        location = geolocator.geocode(full_address, timeout=10)
        time.sleep(1)  # Respect rate limit
        return (location.latitude, location.longitude) if location else (None, None)
    except Exception as e:
        print(f"Error geocoding address {full_address}: {e}")
        return (None, None)

# Load dataset
file_path = "/home/jovyan/up221-leob/week02/LA_addresses.csv"  
df = pd.read_csv(file_path)

# Ensure full_address column is properly created
df["full_address"] = (
    df["Address - House number, street, and apartment number"] + ", " +
    df["City"] + ", " +
    df["State"] + ", " +
    df["ZIP"].astype(str)
)

# Use only one argument in executor.map
df_sample = df.head(10)  # Only process 10 rows for testing

with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    results = list(executor.map(get_lat_lon, df_sample["full_address"]))

df_sample["latitude"], df_sample["longitude"] = zip(*results)

# Print the results
print(df_sample[["full_address", "latitude", "longitude"]])



Error geocoding address nan: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=nan&format=json&limit=1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7b44c29fbdd0>, 'Connection to nominatim.openstreetmap.org timed out. (connect timeout=10)'))
Error geocoding address 1812 Navy Street, Santa Monica, CA, 90045.0: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=1812+Navy+Street%2C+Santa+Monica%2C+CA%2C+90045.0&format=json&limit=1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7b44c1d7a9d0>, 'Connection to nominatim.openstreetmap.org timed out. (connect timeout=10)'))
Error geocoding address 11708 exposition blvd, Los Angeles, Ca, 90064.0: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=11708+exposition+blvd%2C+Los+Angeles%2C+Ca%2C+90064.0&format=json&l

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sample["latitude"], df_sample["longitude"] = zip(*results)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sample["latitude"], df_sample["longitude"] = zip(*results)


In [8]:
df = df.dropna(subset=["full_address"])  # Remove missing addresses


In [2]:
from geopy.geocoders import GoogleV3

GOOGLE_API_KEY = "AIzaSyDXXIt47NVTgnrJZ9HPysL3FJtgOmGdTyk"
geolocator = GoogleV3(api_key=GOOGLE_API_KEY)

# Test with a known address
location = geolocator.geocode("1812 Navy Street, Santa Monica, CA")

print(location.latitude, location.longitude) if location else print("API not working.")


34.0086557 -118.4604661


In [1]:
from geopy.geocoders import GoogleV3
import pandas as pd
import time

#  Use Google API Key
GOOGLE_API_KEY = "AIzaSyDXXIt47NVTgnrJZ9HPysL3FJtgOmGdTyk"
geolocator = GoogleV3(api_key=GOOGLE_API_KEY)


# Function to get latitude & longitude with retry, rate limiting, and progress tracking
def get_lat_lon(full_address, index, total):
    retries = 3  # Maximum retry attempts
    for attempt in range(retries):
        try:
            location = geolocator.geocode(full_address, timeout=10)
            time.sleep(1)  # Slow down requests (1 per second)
            if location:
                if index % 100 == 0:  # Print progress every 100 addresses
                    print(f"Processed {index}/{total} addresses...")
                return (location.latitude, location.longitude)
        except Exception as e:
            print(f"Error geocoding {full_address} (Attempt {attempt+1}): {e}")
            time.sleep(2)  # Wait before retrying
    return (None, None)  # Return None if all retries fail

# Load dataset
file_path = "/home/jovyan/up221-leob/week02/LA_addresses.csv"  # Update with actual file
df = pd.read_csv(file_path)

# Ensure full_address column is correctly formatted
df["full_address"] = (
    df["Address - House number, street, and apartment number"] + ", " +
    df["City"] + ", " +
    df["State"] + ", " +
    df["ZIP"].astype(str)
)

# Remove missing addresses
df = df.dropna(subset=["full_address"]).reset_index(drop=True)

# Initialize storage for latitude/longitude
df["latitude"], df["longitude"] = None, None  # Create empty columns

# Geocode with progress tracking
total_addresses = len(df)

for idx in range(total_addresses):
    address = df.loc[idx, "full_address"]
    lat, lon = get_lat_lon(address, idx + 1, total_addresses)
    
    df.at[idx, "latitude"] = lat  # ✅ Assign values correctly
    df.at[idx, "longitude"] = lon

    if (idx + 1) % 100 == 0:  # ✅ Save progress every 100 addresses
        df.to_csv("geocoded_addresses_google_partial.csv", index=False)
        print(f"✅ Saved progress at {idx+1} addresses...")

# Final Save
df.to_csv("geocoded_addresses_google.csv", index=False)

print("✅ Geocoding complete! Results saved.")


Processed 100/1490 addresses...
✅ Saved progress at 100 addresses...
Processed 200/1490 addresses...
✅ Saved progress at 200 addresses...
Processed 300/1490 addresses...
✅ Saved progress at 300 addresses...
Processed 400/1490 addresses...
✅ Saved progress at 400 addresses...
Processed 500/1490 addresses...
✅ Saved progress at 500 addresses...
Processed 600/1490 addresses...
✅ Saved progress at 600 addresses...
Processed 700/1490 addresses...
✅ Saved progress at 700 addresses...
Processed 800/1490 addresses...
✅ Saved progress at 800 addresses...
Processed 900/1490 addresses...
✅ Saved progress at 900 addresses...
Processed 1000/1490 addresses...
✅ Saved progress at 1000 addresses...
Processed 1100/1490 addresses...
✅ Saved progress at 1100 addresses...
Processed 1200/1490 addresses...
✅ Saved progress at 1200 addresses...
Processed 1300/1490 addresses...
✅ Saved progress at 1300 addresses...
Processed 1400/1490 addresses...
✅ Saved progress at 1400 addresses...
✅ Geocoding complete! Re

In [2]:
import folium
import pandas as pd

# Load your geocoded dataset
file_path = "geocoded_addresses_google.csv"  # Ensure this matches your saved file
df = pd.read_csv(file_path)

# Drop rows with missing coordinates (if any)
df = df.dropna(subset=["latitude", "longitude"])

# Convert lat/lon to floats (sometimes they get saved as strings)
df["latitude"] = df["latitude"].astype(float)
df["longitude"] = df["longitude"].astype(float)

# Set initial map location (Los Angeles County) and zoom level
m = folium.Map(location=[34.0522, -118.2437], zoom_start=10)

# Add markers for each address
for idx, row in df.iterrows():
    folium.Marker(
        location=[row["latitude"], row["longitude"]],
        popup=f"{row['full_address']}",  # Shows address when clicked
        icon=folium.Icon(color="blue", icon="home"),
    ).add_to(m)

# Save & Display Map
m.save("mapped_addresses.html")  # Saves as an HTML file
m  # Displays in Jupyter Notebook


In [12]:
import folium
import pandas as pd
from folium.plugins import Fullscreen, MousePosition, MeasureControl

# Load your geocoded dataset
file_path = "geocoded_addresses_google.csv"
df = pd.read_csv(file_path)

# Drop rows with missing coordinates (if any)
df = df.dropna(subset=["latitude", "longitude"])

#  Convert lat/lon to floats (sometimes they get saved as strings)
df["latitude"] = df["latitude"].astype(float)
df["longitude"] = df["longitude"].astype(float)

# Create a map with a **light gray basemap**
m = folium.Map(
    location=[34.0522, -118.2437], 
    zoom_start=10, 
    tiles="cartodbpositron"  # Light gray basemap
)

# Add a scale bar (changes as you zoom)
MeasureControl(primary_length_unit="meters").add_to(m)



# Function to create dynamic popups (only showing non-empty fields)
def create_popup(row):
    popup_info = ""
    for col in df.columns:
        value = row[col]
        if pd.notna(value) and str(value).strip():  # Only include non-empty values
            popup_info += f"<b>{col}:</b> {value}<br>"
    return folium.Popup(popup_info, max_width=400)

# Add points with smaller size and detailed popups
for _, row in df.iterrows():
    folium.CircleMarker(
        location=[row["latitude"], row["longitude"]],
        radius=3,  # Smaller points
        color="blue",
        fill=True,
        fill_color="blue",
        fill_opacity=0.6,
        popup=create_popup(row),  # Dynamic popup content
    ).add_to(m)

# Add zoom fullscreen control
Fullscreen().add_to(m)

# Add coordinate display (Mouse Position)
MousePosition().add_to(m)

# Save & Display Map
m.save("mapped_addresses.html")  # Saves as an HTML file
m  # Displays in Jupyter Notebook



In [13]:
import os
os.getcwd()


'/home/jovyan/up221-leob/week02'

In [15]:
import os
print(os.listdir("/home/jovyan/up221-leob/week02"))


['geocoded_addresses_google.csv', 'LA_addresses.csv', '.ipynb_checkpoints', 'geocoded_addresses_google_partial.csv', 'Untitled.ipynb', 'Geocoding.ipynb', 'week2assignment.ipynb', 'mapped_addresses.html']


In [None]:
import json

notebook_path = "your_notebook.ipynb"  # Replace with your actual file name

try:
    with open(notebook_path, "r", encoding="utf-8") as f:
        json.load(f)
    print("✅ Notebook is valid JSON!")
except json.JSONDecodeError as e:
    print(f"❌ Notebook is corrupted: {e}")
