<font color=#2DA2F2>

**Google Maps API**
- **Batch Geocoding:** The batch size limit is 100 queries per batch request.
- These limits define how many addresses or locations you can geocode in a single batch request.

In [1]:
# import libraries
import pandas as pd
from geopy.geocoders import Nominatim 
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from tqdm import tqdm

# Load the CSV file into a DataFrame
df = pd.read_csv('data/npidata_pfile.csv')

# Extract the first 5 digits of the zip code ,ensure that the zip code is an integer
df['Zip Code'] = df['Zip Code'].apply(lambda x: int(x / 10**(len(str(int(x))) - 5)) if not pd.isna(x) else x).astype('Int64')
# combine the names provider to create a full name column, ensure that the names are strings and it should be last name, first name middle name format 
df['Full Name'] = df['Provider Last Name'] + ', ' + df['Provider First Name'] + ' ' + df['Provider Middle Name']

df=df[['NPI','Full Name','Street','City','State','Zip Code']]
# drop rows with missing values of street 1,full name
df = df.dropna(subset=['Street','Full Name'])
# drop duplicates
df = df.drop_duplicates()
# print(df.shape)
# randomly sample 1000 rows
df = df.sample(1000)
df.sample(15)


Unnamed: 0,NPI,Full Name,Street,City,State,Zip Code
22524,1073025458,"SOBO, ABOSEDE O",7933 BELLE POINT DR,GREENBELT,MD,20770
19280,1649708835,"TORABI, ASAD JOHN",701 SUPERIOR AVE STE J,MUNSTER,IN,46321
14453,1821552241,"PIANKHI, TKEYAH KIANA",3100 FIVE FORKS TRICKUM RD SW STE 203,LILBURN,GA,30047
23363,1336971563,"GLEBE, ASHANTE KIREN",2116 W FAIDLEY AVE STE 2100,GRAND ISLAND,NE,68803
1307,1841638848,"SMITH, LISA A",1734 MADISON AVE,MEMPHIS,TN,38104
13383,1740011444,"LENSS, RENEE MARY",3263 EATON RD STE 1,GREEN BAY,WI,54311
28722,1780379800,"MATTHEWS, KRISTINE N",310 PANTHER CT,WOODLAND PARK,CO,80863
20985,1619603727,"NORTON, AMANDA LYNNE",3545 N VERMILION ST,DANVILLE,IL,61832
10347,1851979900,"KENNEDY, BYRON L",1839 CENTRAL AVE,ST PETERSBURG,FL,33713
19866,1346072741,"GHADESSI, NEGUIN E",232 E GISH RD,SAN JOSE,CA,95112


In [None]:
import requests
import time
import pandas as pd

# Placeholder for your Google Maps API key
google_maps_key = "YOUR_GOOGLE_MAPS_API_KEY_HERE"

# Function to send a batch request to Google Maps API
def batch_geocode_google(addresses, api_key):
    url = "https://maps.googleapis.com/maps/api/geocode/json"
    results = []

    for address in addresses:
        params = {
            'address': address,
            'key': api_key
        }
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                if data['status'] == 'OK':
                    location = data['results'][0]['geometry']['location']
                    results.append((address, location['lat'], location['lng']))
                else:
                    results.append((address, None, None))
            else:
                results.append((address, None, None))
        except requests.exceptions.RequestException as e:
            print(f"Request failed for {address}: {e}")
            results.append((address, None, None))
        
        # To avoid hitting rate limits
        time.sleep(1)
    
    return results

# Example usage:
# Send addresses in batches
batch_size = 50
batches = [addresses[i:i + batch_size] for i in range(0, len(addresses), batch_size)]
all_results_google = []

for batch in batches:
    results = batch_geocode_google(batch, google_maps_key)
    all_results_google.extend(results)

# Convert results to a DataFrame
geocoded_data_google = pd.DataFrame(all_results_google, columns=['Clean_address', 'latitude', 'longitude'])

# Save the geocoded data to a CSV file
geocoded_data_google.to_csv('geocoded_addresses_google.csv', index=False)
