<font color=#2DA2F2>

**Azure Maps API**
- **Batch Geocoding:** The batch size limit is 10,000 queries per batch request


In [1]:
# import libraries
import pandas as pd
from geopy.geocoders import Nominatim 
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from tqdm import tqdm

# Load the CSV file into a DataFrame
df = pd.read_csv('data/npidata_pfile.csv')

# Extract the first 5 digits of the zip code ,ensure that the zip code is an integer
df['Zip Code'] = df['Zip Code'].apply(lambda x: int(x / 10**(len(str(int(x))) - 5)) if not pd.isna(x) else x).astype('Int64')
# combine the names provider to create a full name column, ensure that the names are strings and it should be last name, first name middle name format 
df['Full Name'] = df['Provider Last Name'] + ', ' + df['Provider First Name'] + ' ' + df['Provider Middle Name']

df=df[['NPI','Full Name','Street','City','State','Zip Code']]
# drop rows with missing values of street 1,full name
df = df.dropna(subset=['Street','Full Name'])
# drop duplicates
df = df.drop_duplicates()
# print(df.shape)
# randomly sample 1000 rows
df = df.sample(1000)
df.sample(15)


Unnamed: 0,NPI,Full Name,Street,City,State,Zip Code
19221,1336617653,"FORBES, ELIZABETH ANNE",450 JACKSON ST 2352,COLUMBUS,IN,47201
27998,1700325354,"DILLON, JILL E",806 S GARFIELD AVE,TRAVERSE CITY,MI,49686
2435,1235608589,"LINTON, DAVID L",8870 S MARYLAND PKWY STE 115,LAS VEGAS,NV,89123
10933,1356906127,"NEEDENS, JORDAN M",2820 MOUNT RUSHMORE RD,RAPID CITY,SD,57701
14163,1326595059,"TALBERT, JACOB AUSTIN",277 SOUTH ST,SAN LUIS OBISPO,CA,93401
9639,1932930674,"FLORES NAZARIO, LUNA CRISTAL",CARR 118 INT KM 4.7 CHALETS ANTONIA 7,SAN GERMAN,PR,68300
17674,1063745149,"NWOKOCHA, FLORENCE IGBOAMA",8233 E STOCKTON BLVD,SACRAMENTO,CA,95828
19050,1689844375,"WRIGHT, JENNIFER R",111 W STONE DR,KINGSPORT,TN,37660
6073,1811217417,"GRUBE, JUSTIN PAUL",411 STAGELINE RD STE 150,HUDSON,WI,54016
17659,1154151421,"WHELAN, AARON J",353 POND ST UNIT R,WOONSOCKET,RI,28952


In [2]:
import requests
import time
import pandas as pd
import os

# pass your Azure Maps API key
azure_maps_key = os.getenv('azure_maps_key')

# Function to send a batch request to Azure Maps API
def batch_geocode_azure(addresses, api_key):
    url = "https://atlas.microsoft.com/search/address/json"
    results = []

    for address in addresses:
        params = {
            'api-version': '1.0',
            'query': address,
            'subscription-key': api_key
        }
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                if data['summary']['numResults'] > 0:
                    coordinates = data['results'][0]['position']
                    results.append((address, coordinates['lat'], coordinates['lon']))
                else:
                    results.append((address, None, None))
            else:
                results.append((address, None, None))
        except requests.exceptions.RequestException as e:
            print(f"Request failed for {address}: {e}")
            results.append((address, None, None))
        
        # To avoid hitting rate limits
        time.sleep(1)
    
    return results

# Send addresses in batches
batch_size = 50
batches = [addresses[i:i + batch_size] for i in range(0, len(addresses), batch_size)]
all_results_azure = []

for batch in batches:
    results = batch_geocode_azure(batch, azure_maps_key)
    all_results_azure.extend(results)

# add the geocoded data to the original DataFrame


# Convert results to a DataFrame
geocoded_data_azure = pd.DataFrame(all_results_azure, columns=['Clean_address', 'latitude', 'longitude'])

# add the geocoded data to the original DataFrame
df = pd.merge(df, geocoded_data_azure, on='NPI', how='left')

# Save the geocoded data to a CSV file
df.to_csv('data/npidata_pfile_geocoded_azure.csv', index=False,if_exists='replace')
df.head(20)

KeyError: 'NPI'

<font color=#2DA5F2>

**2. Batch Geocoding with Google Maps API**
- Below is the Python code to extract the address information from the file and prepare it for geocoding.
