In [1]:
# Dependencies
import pandas as pd
import requests
import math
import folium

Load the data

In [2]:
df = pd.read_excel('Sample dataset.xlsx', header=0)
df

Unnamed: 0,NAME,ADDRESS,AREA,CITY,pin_code,longitude,lattitude
0,Cyber Hub,"21, DLF Tower 10th Rd, DLF Cyber City, DLF Pha...",Haryana,Gurgaon,122022.0,77.0891,28.495
1,Ambience Mall,"Ground floor, Ambience Mall",Haryana,Gurgaon,,77.096,28.5055
2,The Grand Venice,"Plot No SH3, Site IV, near Pari Chowk",Uttar Pradesh,Greater Noida,201308.0,77.5263,28.4525
3,Kingdom of Dreams,Great Indian Nautanki Pvt. Ltd. Auditorium Com...,Haryana,Gurgaon,122001.0,77.0689,28.4679
4,The Oberoi,"443, Shankar Chowk Rd, opposite Metro Station,...",Haryana,Gurugram,122016.0,77.0882,28.5022
5,The Leela Palace,"National Highway 8, Ambience Island, DLF Phase...",Haryana,Gurugram,122002.0,77.0965,28.5054


Distance column
New coordinates
New Address
Use cases

In [3]:
# Haversine formula
def haversine(coord1, coord2):
    R = 6371.0  # radius of Earth in kilometers
    lat1, lon1 = math.radians(coord1[0]), math.radians(coord1[1])
    lat2, lon2 = math.radians(coord2[0]), math.radians(coord2[1])
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c
    return distance

In [4]:
# Geoencode function
def geoencode_arcgis(address, area=None, city=None, pincode=None):
    full_address = [address]
    if area:
        full_address.append(area)
    if city:
        full_address.append(city)
    if pincode:
        full_address.append(str(pincode))

    full_address = ', '.join(full_address).replace(' ', '%20')

    url = f"https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/findAddressCandidates?SingleLine={full_address}&f=pjson"

    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'candidates' in data and len(data['candidates']) > 0:
            result = data['candidates'][0]
            location = (result['location']['y'], result['location']['x'])
            extent = ((result['extent']['ymin'], result['extent']['xmin']),
                      (result['extent']['ymax'], result['extent']['xmax']))
            distance_to_extent = min(haversine(location, point) for point in extent)
            return location, extent, distance_to_extent
    print(f"Request failed with status {response.status_code}")
    return None, None, None

Geoencoding

In [8]:
# Update function
def update_coordinates(row, threshold):
    address_base = str(row['ADDRESS'])
    area = str(row['AREA'])
    city = str(row['CITY'])
    pincode = str(row['PINCODE'])
    
    original_coordinates = (row['lattitude'], row['longtude'])
    
    location, extent, distance_to_extent = geoencode_arcgis(address_base, area, city, pincode)
    if location is not None:
        calculated_distance = haversine(original_coordinates, location)

        # Update only if the calculated distance is greater than both the distance to the extent and the threshold
        if calculated_distance > distance_to_extent and calculated_distance > threshold:
            return location[0], location[1], row['ID']
    # If no update is required, return the original coordinates and None for the ID
    return original_coordinates[0], original_coordinates[1], None

In [13]:
# Plotting function
def plot_map(row, threshold):
    # Base map
    m = folium.Map(location=(row['lattitude'], row['longitude']), zoom_start=15)

    # Original coordinates
    original_coordinates = (row['lattitude'], row['longitude'])

    # Add a marker for the original coordinates
    folium.Marker(
        location=original_coordinates,
        popup='Original',
        icon=folium.Icon(color='green', icon='info-sign')
    ).add_to(m)

    # Get the geoencoded coordinates
    geoencoded_coordinates, extent_coordinates, _ = geoencode_arcgis(row['ADDRESS'], row['AREA'], row['CITY'], row['pin_code'])
    
    # Add a marker for the geoencoded coordinates
    folium.Marker(
        location=geoencoded_coordinates,
        popup='Geoencoded',
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(m)

    # Plot a rectangle for the extent
    folium.Rectangle(
        bounds=extent_coordinates,
        color='blue'
    ).add_to(m)

    # Plot a threshold radius circle using folium.Circle for geoencoded coordinates
    folium.Circle(
        location=geoencoded_coordinates,
        radius=threshold*1000,  # radius in meters
        color='red',
        fill=True,
        fill_color='red'
    ).add_to(m)

    return m

In [14]:
# Plot the map for a sample row
threshold = 1 
plot_map(df.iloc[3], threshold)

Driver Code

In [15]:
# Update coordinates and get IDs of updated rows
updated_coordinates_and_ids = df.apply(update_coordinates, args=(threshold,), axis=1)

# Update LATITUDE and LONGITUDE in df where ID is not None
for lat, lon, id in updated_coordinates_and_ids:
    if id is not None:
        df.loc[df['ID'] == id, 'LATTITUDE'] = lat
        df.loc[df['ID'] == id, 'LONGITUDE'] = lon

# Get updated IDs
updated_ids = [id for lat, lon, id in updated_coordinates_and_ids if id is not None]

KeyError: 'PINCODE'

In [None]:
print(f"{len(updated_ids)} rows updated")
print("Updated IDs: " + ", ".join(map(str, updated_ids)))