In [3]:
# Dependencies
import pandas as pd
import requests
import math
import folium

Load the data

In [4]:
df = pd.read_excel('Sample Dataset.xlsx', header=0)
df

Unnamed: 0,NAME,ADDRESS,AREA,CITY,pin_code,longitude,lattitude
0,Cyber Hub,"21, DLF Tower 10th Rd, DLF Cyber City, DLF Pha...",Haryana,Gurgaon,122022.0,77.0891,28.495
1,Ambience Mall,"Ground floor, Ambience Mall",Haryana,Gurgaon,,77.096,28.5055
2,The Grand Venice,"Plot No SH3, Site IV, near Pari Chowk",Uttar Pradesh,Greater Noida,201308.0,77.5263,28.4525
3,Kingdom of Dreams,Great Indian Nautanki Pvt. Ltd. Auditorium Com...,Haryana,Gurgaon,122001.0,77.0689,28.4679
4,The Oberoi,"443, Shankar Chowk Rd, opposite Metro Station,...",Haryana,Gurugram,122016.0,77.0882,28.5022
5,The Leela Palace,"National Highway 8, Ambience Island, DLF Phase...",Haryana,Gurugram,122002.0,77.0965,28.5054


Distance column
New coordinates
New Address
Use cases

In [5]:
# Haversine formula
def haversine(coord1, coord2):
    R = 6371.0  # radius of Earth in kilometers
    lat1, lon1 = math.radians(coord1[0]), math.radians(coord1[1])
    lat2, lon2 = math.radians(coord2[0]), math.radians(coord2[1])
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c
    return distance

In [6]:
# Geoencode function with reverse geocoding
def geoencode_arcgis(name, address, area, city, pincode):
    # Forward geocoding
    full_address = ', '.join([name, address, area, city, str(pincode)]).replace(' ', '%20')
    url = f"https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/findAddressCandidates?SingleLine={full_address}&f=pjson"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if 'candidates' in data and len(data['candidates']) > 0:
            result = data['candidates'][0]
            location = (result['location']['y'], result['location']['x'])
        else:
            print("No candidates found.")
            return None, None
    else:
        print(f"Request failed with status {response.status_code}")
        return None, None

    # Reverse geocoding
    reverse_geocode_url = f"https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/reverseGeocode?location={location[1]},{location[0]}&f=pjson"
    reverse_response = requests.get(reverse_geocode_url)
    if reverse_response.status_code == 200:
        reverse_data = reverse_response.json()
        if 'address' in reverse_data:
            reverse_address = reverse_data['address'].get('LongLabel', None)
        else:
            print("No address found.")
            return None, None
    else:
        print(f"Reverse geocoding request failed with status {reverse_response.status_code}")
        return None, None

    return location, reverse_address

In [7]:
# Update function
def update_coordinates(row):
    name = str(row['NAME'])
    address_base = str(row['ADDRESS'])
    area = str(row['AREA'])
    city = str(row['CITY'])
    pincode = str(row['pin_code'])
    original_coordinates = (row['lattitude'], row['longitude'])
    location, reverse_address = geoencode_arcgis(name, address_base, area, city, pincode)
    if location is not None:
        calculated_distance = haversine(original_coordinates, location)
        return location[0], location[1], calculated_distance, reverse_address
    # If no update is required, return the original coordinates and None for the ID
    return original_coordinates[0], original_coordinates[1], None, None

Driver Code

In [8]:
# Apply the function to update coordinates to the dataframe
df['New LATTITUDE'], df['New LONGITUDE'], df['Distance (km)'], df['New Address'] = zip(*df.apply(update_coordinates, axis=1))
df['New LATTITUDE'], df['New LONGITUDE'] = df['New LONGITUDE'], df['New LATTITUDE']
df

Unnamed: 0,NAME,ADDRESS,AREA,CITY,pin_code,longitude,lattitude,New LATTITUDE,New LONGITUDE,Distance (km),New Address
0,Cyber Hub,"21, DLF Tower 10th Rd, DLF Cyber City, DLF Pha...",Haryana,Gurgaon,122022.0,77.0891,28.495,77.094405,28.493645,0.539884,"10th Road, DLF Cyber City, Gurugram, Haryana, ..."
1,Ambience Mall,"Ground floor, Ambience Mall",Haryana,Gurgaon,,77.096,28.5055,77.09573,28.50582,0.044296,"Ambience Mall Road, DLF Cyber City, Gurugram, ..."
2,The Grand Venice,"Plot No SH3, Site IV, near Pari Chowk",Uttar Pradesh,Greater Noida,201308.0,77.5263,28.4525,77.51158,28.46541,2.032597,"The Grand Venice, Pari Chowk, Omega 2, Greater..."
3,Kingdom of Dreams,Great Indian Nautanki Pvt. Ltd. Auditorium Com...,Haryana,Gurgaon,122001.0,77.0689,28.4679,77.06799,28.46834,0.101519,"Leisure Valley Park Road, Sector 29, Gurugram,..."
4,The Oberoi,"443, Shankar Chowk Rd, opposite Metro Station,...",Haryana,Gurugram,122016.0,77.0882,28.5022,77.088799,28.498839,0.378261,"Service Road, Udyog Vihar, Gurugram, Haryana, ..."
5,The Leela Palace,"National Highway 8, Ambience Island, DLF Phase...",Haryana,Gurugram,122002.0,77.0965,28.5054,77.09259,28.50079,0.639334,"Moulsari Road, DLF Cyber City, Gurugram, Harya..."


Final Output

In [9]:
# Convert the dataframe to a XLXS file
df.to_excel('Sample Dataset Updated.xlsx', index=False)

Cross Verification

In [10]:
# Plotting function
def plot_map(row, threshold):
    # Base map
    m = folium.Map(location=(row['lattitude'], row['longitude']), zoom_start=15)

    # Original coordinates
    original_coordinates = (row['lattitude'], row['longitude'])

    # Add a marker for the original coordinates
    folium.Marker(
        location=original_coordinates,
        popup='Original',
        icon=folium.Icon(color='green', icon='info-sign')
    ).add_to(m)

    # Get the updated coordinates from the dataframe
    updated_coordinates = (row['New LONGITUDE'], row['New LATTITUDE'])
    
    # Add a marker for the updated coordinates
    folium.Marker(
        location=updated_coordinates,
        popup=f"Updated: {updated_coordinates}",
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(m)

    # Plot a threshold radius circle using folium.Circle for updated coordinates
    folium.Circle(
        location=updated_coordinates,
        radius=threshold*1000,  # radius in meters
        color='red',
        fill=True,
        fill_color='red'
    ).add_to(m)

    return m

In [11]:
# Plot the map for a sample row
threshold = 1 
plot_map(df.iloc[0], threshold)