In [1]:
#Data Loading and Preprocessing

import pandas as pd
import geopandas as gpd
import rasterio
from shapely.geometry import Point
import os

#Show all rows and columns in output
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

#Load Excel file
postal_df = pd.read_excel(r"D:\Projects\intr\Land_Price_Prediction\Dataset\Pin_code_and_postal\Pin_code_and_postal.xlsx")

#Clean malformed latitude and longitude strings like "84.0464 E"
postal_df['Latitude'] = postal_df['Latitude'].astype(str).str.replace(' N', '', regex=False)
postal_df['Longitude'] = postal_df['Longitude'].astype(str).str.replace(' E', '', regex=False)

#Convert lat/long to numeric
postal_df['Latitude'] = pd.to_numeric(postal_df['Latitude'], errors='coerce')
postal_df['Longitude'] = pd.to_numeric(postal_df['Longitude'], errors='coerce')

#Check for missing values
print("Missing values before cleaning:")
print(postal_df[['Pincode', 'Latitude', 'Longitude']].isnull().sum())

#Drop rows where lat/long is missing
postal_df = postal_df.dropna(subset=['Latitude', 'Longitude'])

#Remove duplicate pincodes (optional)
postal_df = postal_df.drop_duplicates(subset=['Pincode'])

#Convert to GeoDataFrame
postal_gdf = gpd.GeoDataFrame(
    postal_df,
    geometry=gpd.points_from_xy(postal_df['Longitude'], postal_df['Latitude']),
    crs="EPSG:4326"
)

#Preview the cleaned GeoDataFrame
print(f"\n Cleaned GeoDataFrame: {postal_gdf.shape[0]} rows × {postal_gdf.shape[1]} columns")
postal_gdf.head()


Missing values before cleaning:
Pincode         0
Latitude     8883
Longitude    9041
dtype: int64

 Cleaned GeoDataFrame: 19277 rows × 12 columns


Unnamed: 0,CircleName,RegionName,DivisionName,OfficeName,Pincode,OfficeType,Delivery,District,StateName,Latitude,Longitude,geometry
0,Andhra Pradesh Circle,Kurnool Region,Hindupur Division,Peddakotla B.O,515631,BO,Delivery,ANANTAPUR,ANDHRA PRADESH,14.5689,77.85624,POINT (77.85624 14.5689)
3,Andhra Pradesh Circle,Kurnool Region,Hindupur Division,Obulareddipalli B.O,515581,BO,Delivery,ANANTAPUR,ANDHRA PRADESH,14.2488,78.2588,POINT (78.2588 14.2488)
7,Andhra Pradesh Circle,Kurnool Region,Hindupur Division,Gurrambailu B.O,515571,BO,Delivery,ANANTAPUR,ANDHRA PRADESH,13.922676,78.201176,POINT (78.20118 13.92268)
17,Andhra Pradesh Circle,Kurnool Region,Hindupur Division,Hallikera Devarahalli B.O,515311,BO,Delivery,ANANTAPUR,ANDHRA PRADESH,13.805102,77.043902,POINT (77.0439 13.8051)
20,Andhra Pradesh Circle,Kurnool Region,Hindupur Division,Tammadehalli B.O,515281,BO,Delivery,ANANTAPUR,ANDHRA PRADESH,14.128106,76.980106,POINT (76.98011 14.12811)


In [2]:
#Load Population Density Raster and Sample
import rasterio
raster_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\historical_residex_data\WorldPopOrg_India_Population_2020_dataset.tif"

#Open the raster and sample population density at postal_gdf coordinates
with rasterio.open(raster_path) as src:
    coords = [(x, y) for x, y in zip(postal_gdf.geometry.x, postal_gdf.geometry.y)]
    pop_density_vals = list(src.sample(coords))
    pop_density = [val[0] if val[0] != src.nodata else 0 for val in pop_density_vals]

#Add population density as a new column
postal_gdf['pop_density'] = pop_density

#Preview the result
print("\n GeoDataFrame with Population Density:")
print(postal_gdf[['Pincode', 'District', 'Latitude', 'Longitude', 'pop_density']].head())


 GeoDataFrame with Population Density:
    Pincode   District   Latitude  Longitude  pop_density
0    515631  ANANTAPUR  14.568900  77.856240     2.111855
3    515581  ANANTAPUR  14.248800  78.258800     1.333830
7    515571  ANANTAPUR  13.922676  78.201176     1.816693
17   515311  ANANTAPUR  13.805102  77.043902     3.579516
20   515281  ANANTAPUR  14.128106  76.980106     7.627969


In [3]:
# Define paths
airport_point_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\airports\hotosm_ind_airports_points_geojson\hotosm_ind_airports_points_geojson.geojson"
airport_line_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\airports\hotosm_ind_airports_lines_geojson\hotosm_ind_airports_lines_geojson.geojson"
airport_polygon_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\airports\hotosm_ind_airports_polygons_geojson\hotosm_ind_airports_polygons_geojson.geojson"

# Load each GeoJSON file
try:
    gdf_airport_points = gpd.read_file(airport_point_path)
    print("Airport Points loaded:", len(gdf_airport_points), "records")
except Exception as e:
    print(" Failed to load Airport Points:", e)

try:
    gdf_airport_lines = gpd.read_file(airport_line_path)
    print("Airport Lines loaded:", len(gdf_airport_lines), "records")
except Exception as e:
    print(" Failed to load Airport Lines:", e)

try:
    gdf_airport_polygons = gpd.read_file(airport_polygon_path)
    print("Airport Polygons loaded:", len(gdf_airport_polygons), "records")
except Exception as e:
    print("Failed to load Airport Polygons:", e)


Airport Points loaded: 2710 records
Airport Lines loaded: 7158 records
Airport Polygons loaded: 6126 records


In [4]:
# --- Load Buildings Polygons ---
buildings_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\buildings\hotosm_ind_buildings_polygons_geojson\hotosm_ind_buildings_polygons_geojson.geojson"
buildings_gdf = gpd.read_file(buildings_path)

# Drop null geometry
buildings_gdf = buildings_gdf[~buildings_gdf.geometry.is_empty & buildings_gdf.geometry.notnull()]

# Preview
print("\n Buildings Dataset Loaded:")
print(buildings_gdf.info())
print(buildings_gdf.head())



 Buildings Dataset Loaded:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 15060217 entries, 0 to 15060216
Data columns (total 16 columns):
 #   Column              Dtype   
---  ------              -----   
 0   name                object  
 1   name:en             object  
 2   building            object  
 3   building:levels     object  
 4   building:materials  object  
 5   addr:full           object  
 6   addr:housenumber    object  
 7   addr:street         object  
 8   addr:city           object  
 9   office              object  
 10  source              object  
 11  name:hi             object  
 12  name:ta             object  
 13  osm_id              int32   
 14  osm_type            object  
 15  geometry            geometry
dtypes: geometry(1), int32(1), object(14)
memory usage: 1.7+ GB
None
   name name:en building building:levels building:materials addr:full  \
0  None    None      yes            None               None      None   
1  None    None      y

In [5]:
# --- Load Education Points ---
edu_points_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\education_facilities\hotosm_ind_education_facilities_points_geojson\hotosm_ind_education_facilities_points_geojson.geojson"
edu_points_gdf = gpd.read_file(edu_points_path)
edu_points_gdf = edu_points_gdf[~edu_points_gdf.geometry.is_empty & edu_points_gdf.geometry.notnull()]

print("\n Education Facilities (Points) Loaded:")
print(edu_points_gdf.info())
print(edu_points_gdf.head())

# --- Load Education Polygons ---
edu_polygons_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\education_facilities\hotosm_ind_education_facilities_polygons_geojson\hotosm_ind_education_facilities_polygons_geojson.geojson"
edu_polygons_gdf = gpd.read_file(edu_polygons_path)
edu_polygons_gdf = edu_polygons_gdf[~edu_polygons_gdf.geometry.is_empty & edu_polygons_gdf.geometry.notnull()]

print("\n Education Facilities (Polygons) Loaded:")
print(edu_polygons_gdf.info())
print(edu_polygons_gdf.head())



 Education Facilities (Points) Loaded:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 19502 entries, 0 to 19501
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   name              17945 non-null  object  
 1   name:en           774 non-null    object  
 2   amenity           18793 non-null  object  
 3   building          799 non-null    object  
 4   operator:type     990 non-null    object  
 5   capacity:persons  0 non-null      object  
 6   addr:full         465 non-null    object  
 7   addr:city         2385 non-null   object  
 8   source            610 non-null    object  
 9   name:hi           292 non-null    object  
 10  name:ta           144 non-null    object  
 11  osm_id            19502 non-null  int64   
 12  osm_type          19502 non-null  object  
 13  geometry          19502 non-null  geometry
dtypes: geometry(1), int64(1), object(12)
memory usage: 2.1+ MB
None
      

In [6]:
# --- Load Healthcare Facilities ---
try:
    # Corrected path (folder name is 'healthcare_facilites')
    health_points_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\healthcare_facilites\hotosm_ind_health_facilities_points_geojson\hotosm_ind_health_facilities_points_geojson.geojson"
    health_points_gdf = gpd.read_file(health_points_path)
    health_points_gdf = health_points_gdf[~health_points_gdf.geometry.is_empty & health_points_gdf.geometry.notnull()]
    print("\n Healthcare Facilities (Points) Loaded:")
    print(health_points_gdf.info())
    print(health_points_gdf.head())
except Exception as e:
    print(f"\n Error loading healthcare points data: {e}")

try:
    health_polygons_path = r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\healthcare_facilites\hotosm_ind_health_facilities_polygons_geojson\hotosm_ind_health_facilities_polygons_geojson.geojson"
    health_polygons_gdf = gpd.read_file(health_polygons_path)
    health_polygons_gdf = health_polygons_gdf[~health_polygons_gdf.geometry.is_empty & health_polygons_gdf.geometry.notnull()]
    print("\n Healthcare Facilities (Polygons) Loaded:")
    print(health_polygons_gdf.info())
    print(health_polygons_gdf.head())
except Exception as e:
    print(f"\n Error loading healthcare polygons data: {e}")

# --- Load Population Raster Files (.tif) ---

population_tif_paths = [
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\population_counts_(2015-30)",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density"
]

print("\n Population Raster Files Summary:")

for folder in population_tif_paths:
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith('.tif'):
                path = os.path.join(root, file)
                try:
                    with rasterio.open(path) as src:
                        print(f"✔️ {file} | CRS: {src.crs}, Shape: {src.shape}, Bounds: {src.bounds}")
                except Exception as e:
                    print(f" Error loading {file}: {e}")

# --- Load Population Density CSVs (ASCII XYZ format) ---

density_csv_paths = [
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2016_1km_ASCII_XYZ\ind_pd_2016_1km_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2016_1km_UNadj_ASCII_XYZ\ind_pd_2016_1km_UNadj_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2017_1km_ASCII_XYZ\ind_pd_2017_1km_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2018_1km_ASCII_XYZ\ind_pd_2018_1km_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2018_1km_UNadj_ASCII_XYZ\ind_pd_2018_1km_UNadj_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2019_1km_ASCII_XYZ\ind_pd_2019_1km_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2019_1km_UNadj_ASCII_XYZ\ind_pd_2019_1km_UNadj_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2020_1km_ASCII_XYZ\ind_pd_2020_1km_ASCII_XYZ.csv",
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\population\popluation_density\ind_pd_2020_1km_UNadj_ASCII_XYZ\ind_pd_2020_1km_UNadj_ASCII_XYZ.csv"
]

print("\n Population Density CSVs Summary:")

for csv_path in density_csv_paths:
    try:
        df = pd.read_csv(csv_path)
        print(f" Loaded: {os.path.basename(csv_path)} | Shape: {df.shape} | Columns: {list(df.columns)}")
    except Exception as e:
        print(f" Error loading {csv_path}: {e}")



 Healthcare Facilities (Points) Loaded:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 129968 entries, 0 to 129967
Data columns (total 16 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   name                   127696 non-null  object  
 1   name:en                1036 non-null    object  
 2   amenity                89014 non-null   object  
 3   building               49 non-null      object  
 4   healthcare             70881 non-null   object  
 5   healthcare:speciality  3516 non-null    object  
 6   operator:type          8120 non-null    object  
 7   capacity:persons       0 non-null       object  
 8   addr:full              100314 non-null  object  
 9   addr:city              6183 non-null    object  
 10  source                 109890 non-null  object  
 11  name:hi                2251 non-null    object  
 12  name:ta                165 non-null     object  
 13  osm_id                 12

In [7]:
def load_clean_dataset(path, layer_name=None, use_cols=None, dataset_name=""):
    try:
        print(f"\n Loading: {dataset_name}")
        gdf = gpd.read_file(path, layer=layer_name, engine="fiona")
        
        if use_cols:
            gdf = gdf[use_cols + ['geometry']] if 'geometry' in gdf.columns else gdf[use_cols]
        
        # Drop all rows with empty geometry
        gdf = gdf[gdf.geometry.notnull()]
        
        # Drop rows where all selected columns are null
        gdf = gdf.dropna(how='all', subset=gdf.columns.difference(['geometry']))

        print(f" {dataset_name} Loaded: {gdf.shape}")
        print(gdf.head(2))
        return gdf
    except Exception as e:
        print(f" Error loading {dataset_name}: {e}")
        return None


# === Railways ===
railway_lines = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\railway\hotosm_ind_railways_lines_geojson\hotosm_ind_railways_lines_geojson.geojson",
    dataset_name="Railway Lines"
)

railway_points = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\railway\hotosm_ind_railways_points_geojson\hotosm_ind_railways_points_geojson.geojson",
    dataset_name="Railway Points"
)

# === Roads ===
road_lines = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\roads\road lines\hotosm_ind_roads_lines_geojson\hotosm_ind_roads_lines_geojson.geojson",
    dataset_name="Road Lines"
)

# === Seaports ===
seaport_lines = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\seaport\hotosm_ind_sea_ports_lines_geojson\hotosm_ind_sea_ports_lines_geojson.geojson",
    dataset_name="Seaport Lines"
)

seaport_points = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\seaport\hotosm_ind_sea_ports_points_geojson\hotosm_ind_sea_ports_points_geojson.geojson",
    dataset_name="Seaport Points"
)

seaport_polygons = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\seaport\hotosm_ind_sea_ports_polygons_geojson\hotosm_ind_sea_ports_polygons_geojson.geojson",
    dataset_name="Seaport Polygons"
)

# === Waterways ===
waterways_lines = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\Waterways\hotosm_ind_waterways_lines_geojson\hotosm_ind_waterways_lines_geojson.geojson",
    dataset_name="Waterways Lines"
)

waterways_points = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\Waterways\hotosm_ind_waterways_points_geojson\hotosm_ind_waterways_points_geojson.geojson",
    dataset_name="Waterways Points"
)

waterways_polygons = load_clean_dataset(
    r"D:\Projects\intr\Land_Price_Prediction\Dataset\HOTOSM\data_hum_data_org\Waterways\hotosm_ind_waterways_polygons_geojson\hotosm_ind_waterways_polygons_geojson.geojson",
    dataset_name="Waterways Polygons"
)



 Loading: Railway Lines
 Railway Lines Loaded: (89129, 14)
   name name:en railway   ele operator:type layer addr:full addr:city source  \
0  None    None    rail  None          None  None      None      None   None   
1  None    None    rail  None          None  None      None      None   None   

  name:hi name:ta     osm_id   osm_type  \
0    None    None   26264238  ways_line   
1    None    None  205006856  ways_line   

                                            geometry  
0   LINESTRING (82.99213 25.3308, 82.99379 25.33171)  
1  LINESTRING (81.8368 25.44461, 81.83662 25.4446...  

 Loading: Railway Points
 Railway Points Loaded: (8654, 14)
      name name:en  railway   ele operator:type layer addr:full addr:city  \
0  Banisar    None  station  None          None  None      None      None   
1   Deswal    None  station  None          None  None      None      None   

  source name:hi name:ta      osm_id osm_type                   geometry  
0   None    None    None  1910223237

In [8]:
# === STEP 1: User Input ===
user_lat = 28.6139   # Example: New Delhi
user_lon = 77.2090
user_point = Point(user_lon, user_lat)

# === STEP 2: Buffer (e.g., 5km radius) ===
buffer_radius_meters = 5000  # 5 km
buffer = gpd.GeoSeries([user_point], crs='EPSG:4326').to_crs(epsg=3857).buffer(buffer_radius_meters).to_crs(epsg=4326)

# === STEP 3: Spatial Filter Function ===
def spatial_filter_within_buffer(gdf, buffer_geom):
    if gdf is None or gdf.empty:
        return gdf
    try:
        return gdf[gdf.geometry.within(buffer_geom.geometry.values[0])]
    except Exception as e:
        print(f"Spatial filtering error: {e}")
        return gdf

# === STEP 4: Apply to All Layers ===
railway_lines_nearby = spatial_filter_within_buffer(railway_lines, buffer)
railway_points_nearby = spatial_filter_within_buffer(railway_points, buffer)
road_lines_nearby     = spatial_filter_within_buffer(road_lines, buffer)
seaport_points_nearby = spatial_filter_within_buffer(seaport_points, buffer)
seaport_polys_nearby  = spatial_filter_within_buffer(seaport_polygons, buffer)
water_lines_nearby    = spatial_filter_within_buffer(waterways_lines, buffer)
water_points_nearby   = spatial_filter_within_buffer(waterways_points, buffer)
water_polys_nearby    = spatial_filter_within_buffer(waterways_polygons, buffer)

# === STEP 5: Generate Feature Table ===
features = {
    'num_railway_lines': len(railway_lines_nearby),
    'num_railway_points': len(railway_points_nearby),
    'num_road_lines': len(road_lines_nearby),
    'num_seaport_points': len(seaport_points_nearby),
    'num_seaport_polygons': len(seaport_polys_nearby),
    'num_water_lines': len(water_lines_nearby),
    'num_water_points': len(water_points_nearby),
    'num_water_polygons': len(water_polys_nearby),
}

features_df = pd.DataFrame([features])
print("\n Feature Table Based on User Location:")
print(features_df)



 Feature Table Based on User Location:
   num_railway_lines  num_railway_points  num_road_lines  num_seaport_points  \
0                219                  34            8374                   0   

   num_seaport_polygons  num_water_lines  num_water_points  num_water_polygons  
0                     0               28                 0                  55  


In [15]:
# from shapely.ops import nearest_points
import pandas as pd

# === Step 3: Display valid coordinates to user ===
print("\n Available Coordinates (choose from below):")
print(postal_gdf[['Pincode', 'District', 'Latitude', 'Longitude']].to_string(index=False))

# === Step 4: Ask for user input (latitude + longitude) ===
try:
    lat_str, lon_str = input("\n Enter Latitude and Longitude separated by space: ").strip().split()
    lat = float(lat_str)
    lon = float(lon_str)
    user_point = gpd.GeoDataFrame(geometry=gpd.points_from_xy([lon], [lat]), crs="EPSG:4326")
except Exception as e:
    print(f"\n Invalid coordinate input: {e}")
    user_point = None

# === Step 5: Nearest postal match ===
if user_point is not None:
    projected_crs = "EPSG:3857"
    postal_proj = postal_gdf.to_crs(projected_crs)
    user_proj = user_point.to_crs(projected_crs)

    postal_proj['distance'] = postal_proj.geometry.distance(user_proj.geometry[0])
    nearest = postal_proj.loc[postal_proj['distance'].idxmin()]

    if nearest['distance'] < 1000:
        print("\n Nearest Match Found:")
        original_row = postal_gdf.loc[nearest.name]
        print(original_row[['Pincode', 'District', 'Latitude', 'Longitude', 'pop_density']])

        # Dynamic base price: urban vs rural
        if original_row['pop_density'] > 5000:
            base_price = 3500
        elif original_row['pop_density'] > 1000:
            base_price = 2500
        else:
            base_price = 1500

        # Spatial feature extraction with counts and avg distances
        def extract_features_with_dist(point_gdf, radius_m=1000):
            crs_proj = "EPSG:3857"
            point_proj = point_gdf.to_crs(crs_proj)
            buffer_geom = point_proj.buffer(radius_m).iloc[0]

            def count_and_avg_dist(gdf):
                if gdf is None or gdf.empty:
                    return (0, None)
                gdf_proj = gdf.to_crs(crs_proj)
                nearby = gdf_proj[gdf_proj.intersects(buffer_geom)]
                count = nearby.shape[0]
                if count > 0:
                    dists = nearby.distance(point_proj.geometry.iloc[0])
                    return (count, dists.mean())
                return (0, None)

            categories = {
                "buildings": buildings_gdf,
                "education": pd.concat([edu_points_gdf, edu_polygons_gdf], ignore_index=True),
                "healthcare": pd.concat([health_points_gdf, health_polygons_gdf], ignore_index=True),
                "railway": pd.concat([railway_points, railway_lines], ignore_index=True),
                "seaport": pd.concat([seaport_points, seaport_polygons], ignore_index=True),
                "waterways": pd.concat([waterways_points, waterways_lines, waterways_polygons], ignore_index=True)
            }

            results = {}
            for cat, gdf_data in categories.items():
                count, avg_dist = count_and_avg_dist(gdf_data)
                results[cat] = {"count": count, "avg_distance_m": avg_dist}

            return results

        features = extract_features_with_dist(user_point)

        # Variety score: number of categories present
        variety_score = sum(1 for cat in features if features[cat]['count'] > 0)

        # Price calculation: base + pop density + variety + proximity
        proximity_bonus = 0
        for cat_data in features.values():
            if cat_data['avg_distance_m'] is not None:
                if cat_data['avg_distance_m'] < 500:
                    proximity_bonus += 0.05
                elif cat_data['avg_distance_m'] < 1000:
                    proximity_bonus += 0.02

        price = base_price * (1 + (original_row['pop_density'] / 20000) + (variety_score * 0.05) + proximity_bonus)

        # Print features
        print("\nSpatial Features (within 1 km):")
        for cat, data in features.items():
            dist_str = f"{data['avg_distance_m']:.1f} m" if data['avg_distance_m'] else "N/A"
            print(f"-> {cat.title()}: {data['count']} (Avg. Dist: {dist_str})")

        print(f"\nVariety Score: {variety_score}")
        print(f"Estimated Land Price: ₹{price:,.2f} per sq.ft")

    else:
        print("\n No nearby valid postal coordinate found (within 1 km).")
else:
    print("\n Skipping price estimation due to input issue.")



 Available Coordinates (choose from below):
 Pincode                  District    Latitude   Longitude
  515631                 ANANTAPUR   14.568900   77.856240
  515581                 ANANTAPUR   14.248800   78.258800
  515571                 ANANTAPUR   13.922676   78.201176
  515311                 ANANTAPUR   13.805102   77.043902
  515281                 ANANTAPUR   14.128106   76.980106
  515241                 ANANTAPUR   13.960000   77.680000
  534176             WEST GODAVARI   16.628600   80.584100
  813105                     BANKA   84.527000   24.239000
  813203                 BHAGALPUR   87.262885   25.298763
  853202                 BHAGALPUR   25.366000   84.258000
  813222                 BHAGALPUR   24.623000   84.714000
  812005                 BHAGALPUR   25.369000   86.369000
  853203                 BHAGALPUR   25.406700    6.805200
  812006                 BHAGALPUR   24.222000   86.365000
  853204                 MADHEPURA   25.215300   86.936200
  813209   


 Enter Latitude and Longitude separated by space:  11.36 75.82



 Nearest Match Found:
Pincode           673616
District       KOZHIKODE
Latitude           11.36
Longitude          75.82
pop_density     4.378806
Name: 54381, dtype: object

Spatial Features (within 1 km):
-> Buildings: 23 (Avg. Dist: 861.7 m)
-> Education: 14 (Avg. Dist: 915.2 m)
-> Healthcare: 2 (Avg. Dist: 617.2 m)
-> Railway: 0 (Avg. Dist: N/A)
-> Seaport: 0 (Avg. Dist: N/A)
-> Waterways: 16 (Avg. Dist: 391.1 m)

Variety Score: 4
Estimated Land Price: ₹1,965.33 per sq.ft
