In [3]:
from geopy.geocoders import Nominatim
import pandas as pd
import time
import json

df = pd.read_pickle("df.pkl")

geolocator = Nominatim(user_agent="sector_mapper_app")


In [4]:
sectors = (
    df["sector"]
    .dropna()
    .astype(str)
    .str.lower()
    .str.strip()
    .unique()
)


In [8]:
len(sectors)

353

In [5]:
sector_coordinates = {}

for sector in sectors:
    try:
        query = f"{sector}, Gurugram, Haryana, India"
        location = geolocator.geocode(query, timeout=10)

        if location:
            sector_coordinates[sector] = {
                "lat": round(location.latitude, 6),
                "lng": round(location.longitude, 6)
            }
            print(f"✔ {sector} added")
        else:
            print(f"❌ Not found: {sector}")

        time.sleep(1)  # IMPORTANT: avoid API blocking

    except Exception as e:
        print(f"⚠ Error for {sector}: {e}")


✔ sector 36 added
✔ sector 89 added
✔ sohna road added
✔ sector 92 added
✔ sector 102 added
✔ gwal pahari added
✔ sector 108 added
✔ sector 105 added
✔ sector 26 added
✔ sector 109 added
✔ sector 28 added
✔ sector 65 added
✔ sector 12 added
✔ sector 85 added
✔ sector 70a added
✔ sector 30 added
✔ sector 107 added
✔ sector 3 added
✔ sector 2 added
✔ sector 41 added
✔ sector 4 added
✔ sector 62 added
✔ sector 49 added
✔ sector 81 added
✔ sector 66 added
✔ sector 86 added
✔ sector 104 added
✔ sector 48 added
✔ sector 51 added
✔ sector 37 added
✔ sector 111 added
✔ sector 67 added
✔ sector 113 added
✔ sector 13 added
✔ sector 61 added
✔ sector 69 added
✔ sector 67a added
✔ sector 37d added
✔ sector 82 added
✔ sector 53 added
✔ sector 74 added
✔ sector 52 added
✔ sector 43 added
✔ sector 14 added
✔ sector 25 added
✔ sector 95 added
✔ sector 56 added
✔ sector 83 added
✔ sector 88a added
✔ sector 55 added
✔ sector 50 added
✔ sector 84 added
✔ sector 91 added
✔ sector 76 added
✔ sector 82a add

In [7]:
print(len(sector_coordinates))


158


In [9]:
with open("sector_coordinates.json", "w") as f:
    json.dump(sector_coordinates, f, indent=4)


In [28]:
# all unique sectors from dataset
all_sectors = (
    df["sector"]
    .dropna()
    .astype(str)
    .str.lower()
    .str.strip()
    .unique()
)

# sectors already mapped
mapped_sectors = set(sector_coordinates.keys())

# sectors still left
left_sectors = sorted(set(all_sectors) - mapped_sectors)

print(f"Total sectors      : {len(all_sectors)}")
print(f"Mapped sectors     : {len(mapped_sectors)}")
print(f"Left (unmapped)    : {len(left_sectors)}")

left_sectors[:10]  # preview


Total sectors      : 353
Mapped sectors     : 160
Left (unmapped)    : 193


['adani samsara',
 'adani samsara vilasa',
 'aipl riviera lake',
 'alpha one',
 'ambience lagoon',
 'ansal celebrity',
 'ansal harmony',
 'ansal height',
 'ansal sushant lok i',
 'ansal valley view']

In [18]:
left_sectors

['adani samsara',
 'adani samsara vilasa',
 'aipl riviera lake',
 'alpha one',
 'ambience lagoon',
 'ansal celebrity',
 'ansal harmony',
 'ansal height',
 'ansal sushant lok i',
 'ansal valley view',
 'ansal versalia',
 'arttech story house',
 'ashiana center court',
 'ashiana mulberry',
 'assotech blith',
 'atskocoon',
 'atstourmal',
 'barga',
 'bella vista central park',
 'bestech altura',
 'bestech park view residency',
 'bhora kalan',
 'birla navya',
 'bptp astaire gardens',
 'bptp green oaks',
 'bptp park prime',
 'breez global heights',
 'breez global hill view',
 'central park cerise suites',
 'central park flower valley',
 'central park resorts sky villas',
 'conscient elaira',
 'conscient elevate',
 'diplomatsgolf l',
 'dlf amaltas drive',
 'dlf arbour',
 'dlf belaire',
 'dlf express greens',
 'dlf garden primus',
 'dlf grove',
 'dlf regal gardens',
 'dlf regency park',
 'dlf skycourt',
 'dlf tr',
 'dlf ultima',
 'elan emperor',
 'elan presidential',
 'eldeco accolade',
 'elde

In [12]:
import pandas as pd

sector_df = pd.DataFrame([
    {"sector": k, "lat": v["lat"], "lon": v["lng"]}
    for k, v in sector_coordinates.items()
])


In [13]:
sector_df

Unnamed: 0,sector,lat,lon
0,sector 36,28.419334,76.988926
1,sector 89,28.418446,76.945757
2,sohna road,28.457201,77.028033
3,sector 92,28.408905,76.915523
4,sector 102,28.475487,76.971175
...,...,...,...
153,old delhi,28.507586,77.071964
154,essel towers,28.476710,77.074820
155,antriksh heights,28.403638,76.964286
156,gpl eden heights,28.394500,77.021425


In [15]:
for sector in left_sectors:
    try:
        query = f"{sector}, Haryana, India"
        location = geolocator.geocode(query, timeout=10)

        if location:
            sector_coordinates[sector] = {
                "lat": round(location.latitude, 6),
                "lng": round(location.longitude, 6)
            }
            print(f"✔ {sector} added")
        else:
            print(f"❌ Not found: {sector}")

        time.sleep(1)  # IMPORTANT: avoid API blocking

    except Exception as e:
        print(f"⚠ Error for {sector}: {e}")

❌ Not found: adani samsara
❌ Not found: adani samsara vilasa
❌ Not found: aipl riviera lake
❌ Not found: alpha one
❌ Not found: ambience lagoon
❌ Not found: ansal celebrity
❌ Not found: ansal harmony
❌ Not found: ansal height
❌ Not found: ansal sushant lok i
❌ Not found: ansal valley view
❌ Not found: ansal versalia
❌ Not found: arttech story house
❌ Not found: ashiana center court
❌ Not found: ashiana mulberry
❌ Not found: assotech blith
❌ Not found: atskocoon
❌ Not found: atstourmal
❌ Not found: barga
❌ Not found: bella vista central park
❌ Not found: bestech altura
❌ Not found: bestech park view residency
❌ Not found: bhora kalan
❌ Not found: birla navya
❌ Not found: bptp astaire gardens
❌ Not found: bptp green oaks
❌ Not found: bptp park prime
❌ Not found: breez global heights
❌ Not found: breez global hill view
❌ Not found: central park cerise suites
❌ Not found: central park flower valley
❌ Not found: central park resorts sky villas
❌ Not found: conscient elaira
❌ Not found: cons

In [29]:

freq_map = (
    df["sector"]
    .astype(str)
    .str.lower()
    .str.strip()
    .value_counts()
    .loc[list(mapped_sectors)]
    .reset_index()
)

freq_map.columns = ["sector", "frequency"]



In [19]:

freq = (
    df["sector"]
    .astype(str)
    .str.lower()
    .str.strip()
    .value_counts()
    .loc[left_sectors]
    .reset_index()
)

freq.columns = ["sector", "frequency"]



                   sector  frequency
0           adani samsara          1
1    adani samsara vilasa          2
2       aipl riviera lake          1
3               alpha one          1
4         ambience lagoon          2
..                    ...        ...
188    vatika seven lamps          1
189     vatika xpressions          1
190           vista vilas          1
191            west rajiv          1
192    whitelandthe aspen          2

[193 rows x 2 columns]


In [25]:
freq.sort_values(by='frequency',ascending=False)['frequency']

71     5
158    5
66     4
68     4
156    4
      ..
80     1
81     1
82     1
83     1
96     1
Name: frequency, Length: 193, dtype: int64

In [31]:
freq_map.sort_values(by='frequency',ascending=False)['frequency'].sum()

5784