In [1]:
# Run once to set up your Real Estate project folder structure
from pathlib import Path

# Base directory ‚Äî current working directory (Real_Estate_Price)
BASE = Path.cwd()

# Subfolders to create inside the project
for folder in ["data_raw", "data_clean", "images", "models", "notebooks", "cache"]:
    (BASE / folder).mkdir(parents=True, exist_ok=True)

print("‚úÖ Project folder structure created at:", BASE)


‚úÖ Project folder structure created at: C:\Users\91786\Desktop\Real_Estate_Price


In [7]:
# Install libraries (restart kernel after first install if needed)
!pip install pandas numpy geopandas osmnx requests vaderSentiment scikit-learn xgboost joblib tensorflow pillow praw psaw geemap earthengine-api matplotlib




In [9]:
import pandas as pd
from pathlib import Path
import numpy as np

# ‚úÖ Base folder is your current project directory
BASE = Path.cwd()

# Example: Bengaluru dataset from Kaggle (edit filename if different)
f_blr = BASE  / "Bengaluru_House_Data_kaggle.csv"

# Load dataset
df_blr = pd.read_csv(f_blr)
df_blr["city"] = "Bengaluru"

# (Later, repeat for other cities)
# f_mum = BASE / "Mumbai_House_Data.csv"
# df_mum = pd.read_csv(f_mum); df_mum["city"] = "Mumbai"
# ...

# Keep only common columns
keep_cols = [c for c in df_blr.columns if c.lower() in
             ["location", "area_type", "availability", "size", "total_sqft", "bath", "balcony", "price"]]
keep_cols += ["city"]
df_blr = df_blr[keep_cols].copy()

# üßπ Clean numeric sqft and price
def parse_sqft(x):
    s = str(x)
    try:
        if "-" in s:
            a, b = s.split("-")
            return (float(a) + float(b)) / 2
        if s.replace(".", "", 1).isdigit():
            return float(s)
        return np.nan
    except:
        return np.nan

df_blr["total_sqft"] = df_blr["total_sqft"].apply(parse_sqft)
df_blr["price"] = pd.to_numeric(df_blr["price"], errors="coerce")
df_blr = df_blr.dropna(subset=["location", "total_sqft", "price"])

# üè† Infer BHK if column "size" contains strings like "3 BHK"
def parse_bhk(x):
    s = str(x).split()[0]
    return pd.to_numeric(s, errors="coerce")

df_blr["bhk"] = df_blr["size"].apply(parse_bhk)

# üíæ Save cleaned dataset
df_blr.to_csv(BASE / "data_clean" / "blr_clean.csv", index=False)

print("‚úÖ Cleaned Bengaluru dataset saved at:", BASE / "data_clean" / "blr_clean.csv")
df_blr.head()


‚úÖ Cleaned Bengaluru dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\blr_clean.csv


Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,city,bhk
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056.0,2.0,1.0,39.07,Bengaluru,2.0
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600.0,5.0,3.0,120.0,Bengaluru,4.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440.0,2.0,3.0,62.0,Bengaluru,3.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521.0,3.0,1.0,95.0,Bengaluru,3.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200.0,2.0,1.0,51.0,Bengaluru,2.0


In [11]:
import pandas as pd
from pathlib import Path
import numpy as np

# ‚úÖ Base folder (your main project directory)
BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price")

# üìÑ Mumbai dataset path
f_mum = BASE / "Mumbai_Data_kaggle.csv"

# üîπ Load dataset
df_mum = pd.read_csv(f_mum)
df_mum["city"] = "Mumbai"

# üîπ Rename columns for standardization
df_mum.rename(columns={
    "locality": "location",
    "area": "total_sqft",
    "type": "property_type",
    "price": "price_value",
    "status": "availability"
}, inplace=True)

# üîπ Clean numeric columns
def parse_sqft(x):
    try:
        s = str(x)
        if "-" in s:
            a, b = s.split("-")
            return (float(a) + float(b)) / 2
        if s.replace(".", "", 1).isdigit():
            return float(s)
        return np.nan
    except:
        return np.nan

df_mum["total_sqft"] = df_mum["total_sqft"].apply(parse_sqft)
df_mum["price_value"] = pd.to_numeric(df_mum["price_value"], errors="coerce")

# üîπ Drop rows with missing values in key columns
df_mum.dropna(subset=["location", "total_sqft", "price_value"], inplace=True)

# üíæ Save cleaned dataset
output_path = BASE / "data_clean" / "mumbai_clean.csv"
df_mum.to_csv(output_path, index=False)

print(f"‚úÖ Cleaned Mumbai dataset saved at: {output_path}")
df_mum.head()


‚úÖ Cleaned Mumbai dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\mumbai_clean.csv


Unnamed: 0,bhk,property_type,location,total_sqft,price_value,price_unit,region,availability,age,city
0,3,Apartment,Lak And Hanware The Residency Tower,685.0,2.5,Cr,Andheri West,Ready to move,New,Mumbai
1,2,Apartment,Radheya Sai Enclave Building No 2,640.0,52.51,L,Naigaon East,Under Construction,New,Mumbai
2,2,Apartment,Romell Serene,610.0,1.73,Cr,Borivali West,Under Construction,New,Mumbai
3,2,Apartment,Soundlines Codename Urban Rainforest,876.0,59.98,L,Panvel,Under Construction,New,Mumbai
4,2,Apartment,Origin Oriana,659.0,94.11,L,Mira Road East,Under Construction,New,Mumbai


In [13]:
import pandas as pd
from pathlib import Path
import numpy as np

# Base folder (your project)
BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price")

# File path
f_hyd = BASE / "Hyderabad_Data_kaggle.csv"

# Load dataset
df = pd.read_csv(f_hyd)

# Add city column
df["city"] = "Hyderabad"

# ‚úÖ Rename columns to standard format
df = df.rename(columns={
    "title": "property_type",
    "location": "location",
    "price(L)": "price_lakh",
    "rate_persqft": "rate_per_sqft",
    "area_insqft": "total_sqft",
    "building_status": "status"
})

# ‚úÖ Convert area and price to numeric
def parse_sqft(x):
    s = str(x)
    try:
        if "-" in s:
            a, b = s.split("-")
            return (float(a) + float(b)) / 2
        if s.replace(".", "", 1).isdigit():
            return float(s)
        return np.nan
    except:
        return np.nan

df["total_sqft"] = df["total_sqft"].apply(parse_sqft)
df["price_lakh"] = pd.to_numeric(df["price_lakh"], errors="coerce")
df["rate_per_sqft"] = pd.to_numeric(df["rate_per_sqft"], errors="coerce")

# ‚úÖ Drop missing values in key columns
df = df.dropna(subset=["location", "total_sqft", "price_lakh"])

# ‚úÖ Save cleaned file
out_path = BASE / "data_clean" / "hyderabad_clean.csv"
df.to_csv(out_path, index=False)

print("‚úÖ Cleaned Hyderabad dataset saved at:", out_path)
df.head()


‚úÖ Cleaned Hyderabad dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\hyderabad_clean.csv


Unnamed: 0.1,Unnamed: 0,property_type,location,price_lakh,rate_per_sqft,total_sqft,status,city
0,0,3 BHK Apartment,Nizampet,108.0,6000,1805.0,Under Construction,Hyderabad
1,1,3 BHK Apartment,Bachupally,85.8,5500,1560.0,Under Construction,Hyderabad
2,2,2 BHK Apartment,Dundigal,55.64,5200,1070.0,Under Construction,Hyderabad
3,3,2 BHK Apartment,Pocharam,60.48,4999,1210.0,Under Construction,Hyderabad
4,4,3 BHK Apartment,Kollur,113.0,5999,1900.0,Under Construction,Hyderabad


In [15]:
import pandas as pd
from pathlib import Path
import numpy as np

# ‚úÖ Base folder (change path if needed)
BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price")

# ‚úÖ File path for Delhi data
f_delhi = BASE / "Delhi_Data_kaggle.csv"

# ‚úÖ Load dataset
df = pd.read_csv(f_delhi)

# Add city column
df["city"] = "Delhi"

# ‚úÖ Rename columns to standard structure
df = df.rename(columns={
    "Address": "location",
    "area": "total_sqft",
    "price": "price_lakh",
    "Bedrooms": "bhk",
    "bathrooms": "bath",
    "Balcony": "balcony",
    "Status": "status",
    "type_of_building": "building_type",
    "Price_sqft": "rate_per_sqft"
})

# ‚úÖ Convert numeric columns
def parse_sqft(x):
    s = str(x)
    try:
        if "-" in s:
            a, b = s.split("-")
            return (float(a) + float(b)) / 2
        if s.replace(".", "", 1).isdigit():
            return float(s)
        return np.nan
    except:
        return np.nan

df["total_sqft"] = df["total_sqft"].apply(parse_sqft)
df["price_lakh"] = pd.to_numeric(df["price_lakh"], errors="coerce")
df["rate_per_sqft"] = pd.to_numeric(df["rate_per_sqft"], errors="coerce")

# ‚úÖ Drop rows with missing essential values
df = df.dropna(subset=["location", "total_sqft", "price_lakh"])

# ‚úÖ Save cleaned dataset
out_path = BASE / "data_clean" / "delhi_clean.csv"
out_path.parent.mkdir(exist_ok=True, parents=True)
df.to_csv(out_path, index=False)

print("‚úÖ Cleaned Delhi dataset saved at:", out_path)
df.head()


‚úÖ Cleaned Delhi dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\delhi_clean.csv


Unnamed: 0.1,Unnamed: 0,price_lakh,location,total_sqft,latitude,longitude,bhk,Bathrooms,balcony,status,neworold,parking,Furnished_status,Lift,Landmarks,building_type,desc,rate_per_sqft,city
0,0,5600000.0,"Noida Extension, Noida, Delhi NCR",1350.0,28.60885,77.46056,3.0,3.0,,Under Construction,New Property,,,2.0,,Flat,\n\n\n Welcome ...,4148.148148,Delhi
1,1,8800000.0,"Sector 79, Gurgaon, Delhi NCR",1490.0,28.374236,76.952416,3.0,3.0,,Ready to Move,New Property,,Semi-Furnished,2.0,,Flat,\n\n\n Mapsko M...,5906.040268,Delhi
2,2,16500000.0,"Vaishali, Ghaziabad, Delhi NCR",2385.0,28.645769,77.38511,4.0,5.0,,Ready to Move,New Property,1.0,Unfurnished,,,Flat,\n\n\n This pro...,6918.238994,Delhi
3,3,3810000.0,"Link Road, F Block, Sector 50, Noida, Uttar Pr...",1050.0,28.566914,77.436434,2.0,2.0,3.0,,New Property,1.0,Unfurnished,2.0,near Gaur Mulberry Mansion,Flat,\n\n\n AIG Roya...,3628.571429,Delhi
4,4,6200000.0,"Jaypee Pavilion Court Sector 128, Noida, Secto...",1350.0,28.520732,77.356491,2.0,2.0,3.0,Ready to Move,Resale,1.0,,3.0,,Flat,\n\n\n The prop...,4592.592593,Delhi


In [26]:
import pandas as pd
import numpy as np
from pathlib import Path

# ‚úÖ File paths
BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price")
input_path = BASE / "Kolkata_Data_kaggle.csv"
output_path = BASE / "data_clean" / "kolkata_clean.csv"

# ‚úÖ Read data
df = pd.read_csv(input_path)
print("Columns read from file:\n", list(df.columns), "\n")

# ‚úÖ Rename columns for consistency
df = df.rename(columns={
    "Flat_Price": "price",
    "EMI_Starts": "emi_starts",
    "BHK": "bhk",
    "HOUSE_TYPE": "house_type",
    "Purpose": "purpose",
    "Location": "location",
    "Area_Type": "area_type",
    "Total_Sq.ft": "total_sqft",
    "Price_per_sq.ft": "price_per_sqft",
    "Owner_name": "owner_name",
    "Owner_type": "owner_type"
})

# ‚úÖ Keep only relevant columns
cols = [
    "price", "emi_starts", "bhk", "house_type", "purpose", "location",
    "area_type", "total_sqft", "price_per_sqft", "owner_name", "owner_type"
]
df = df[cols].copy()
df["city"] = "Kolkata"

# --- Cleaning helper functions ---
def parse_price(price_str):
    """
    Converts values like '‚Çπ8.5 Cr', '‚Çπ45.0 L' to float in lakhs.
    """
    if not isinstance(price_str, str):
        return np.nan
    s = price_str.replace("‚Çπ", "").replace(",", "").strip().lower()
    try:
        if "cr" in s:
            return float(s.replace("cr", "").strip()) * 100
        if "l" in s:
            return float(s.replace("l", "").strip())
        return float(s)
    except:
        return np.nan

def parse_sqft(x):
    """
    Converts '2100 sq.ft' ‚Üí 2100.0
    """
    if not isinstance(x, str):
        return np.nan
    s = x.lower().replace("sq.ft", "").replace(",", "").strip()
    try:
        if "-" in s:
            a, b = s.split("-")
            return (float(a) + float(b)) / 2
        return float(s)
    except:
        return np.nan

# ‚úÖ Apply cleaning
rows_before = len(df)
df["price_lakh"] = df["price"].apply(parse_price)
df["total_sqft"] = df["total_sqft"].apply(parse_sqft)

# Drop invalid rows
df = df.dropna(subset=["price_lakh", "total_sqft", "location"])
rows_after = len(df)
removed = rows_before - rows_after

# ‚úÖ Save cleaned dataset
df.to_csv(output_path, index=False)

print(f"‚úÖ Rows before cleaning: {rows_before}")
print(f"‚úÖ Rows after cleaning: {rows_after}")
print(f"‚úÖ Removed {removed} rows\n")

print(f"‚úÖ Cleaned Kolkata dataset saved at: {output_path}\n")

# ‚úÖ Show tabular sample
print("üîπ Sample cleaned data:")
print(df.head(10).to_string(index=False))


Columns read from file:
 ['Flat_Price', 'EMI_Starts', 'BHK', 'css-11nfaq3', 'Unnamed: 4', 'HOUSE_TYPE', 'Unnamed: 6', 'Unnamed: 7', 'Purpose', 'Location', 'Area_Type', 'Total_Sq.ft', 'Price_per_sq.ft', 'Owner_name', 'Owner_type'] 

‚úÖ Rows before cleaning: 3968
‚úÖ Rows after cleaning: 3966
‚úÖ Removed 2 rows

‚úÖ Cleaned Kolkata dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\kolkata_clean.csv

üîπ Sample cleaned data:
   price emi_starts   bhk        house_type     purpose               location     area_type  total_sqft price_per_sqft                         owner_name     owner_type    city  price_lakh
 ‚Çπ8.5 Cr ‚Çπ4.22 Lacs 6 BHK Independent House for sale in    Ballygunge, Kolkata Build Up Area      4200.0 ‚Çπ20.24 K/sq.ft                Abhijit Chakraborty Housing Expert Kolkata       850.0
 ‚Çπ45.0 L   ‚Çπ23.83 K 3 BHK Independent House for sale in   Barrackpore, Kolkata Build Up Area      1400.0  ‚Çπ3.21 K/sq.ft                    Virtual Reality Housi

In [30]:
import pandas as pd
import numpy as np
from pathlib import Path

BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price")
input_path = BASE / "Pune_Data_kaggle.csv"
output_path = BASE / "data_clean" / "pune_clean.csv"

# Read file
df = pd.read_csv(input_path)
print("Columns read from file:\n", list(df.columns))

print(f"\n‚úÖ Rows before cleaning: {len(df)}")

# üßπ Clean price range like "6,205 - 7,395/sq. ft."
def parse_price_range(s):
    s = str(s).replace(",", "").strip()
    if "-" in s:
        parts = s.split("-")
        try:
            low = float(parts[0])
            high = float(parts[1].split("/")[0])
            return low, high, (low + high) / 2
        except:
            return np.nan, np.nan, np.nan
    else:
        try:
            val = float(s.split("/")[0])
            return val, val, val
        except:
            return np.nan, np.nan, np.nan

df[["min_price", "max_price", "avg_price"]] = df["buyrates_value_2"].apply(
    lambda x: pd.Series(parse_price_range(x))
)

# Keep only relevant columns
keep_cols = [
    "localityname_value", "min_price", "max_price", "avg_price",
    "buyratesqqa_value",
    "rentalrates_value_2", "rentalrates_value_3", "rentalrates_value_4",
    "rentalrates_value_5", "rentalrates_value_6"
]
df_clean = df[keep_cols].copy()

# Rename columns nicely
df_clean.columns = [
    "location", "min_price_sqft", "max_price_sqft", "avg_price_sqft",
    "price_trend", "rent_2bhk", "rent_3bhk", "rent_4bhk", "rent_5bhk", "rent_others"
]

# Drop rows where price info is missing
df_clean = df_clean.dropna(subset=["avg_price_sqft"])
df_clean["city"] = "Pune"

print(f"‚úÖ Rows after cleaning: {len(df_clean)}")
print(f"‚úÖ Removed {len(df) - len(df_clean)} rows\n")

# Save output
df_clean.to_csv(output_path, index=False)
print(f"‚úÖ Cleaned Pune dataset saved at: {output_path}\n")

print("üîπ Sample cleaned data:")
print(df_clean.head(10))


Columns read from file:
 ['localityname_value', 'buyrates_value_1', 'buyrates_value_2', 'buyratesqqa_value', 'buyratesqqa_image', 'buyratesqqa_image/_alt', 'buyrates_value_3', 'rentalrates_value_1', 'rentalrates_value_2', 'rentalrates_value_3', 'rentalrates_value_4', 'rentalrates_value_5', 'rentalrates_value_6']

‚úÖ Rows before cleaning: 276
‚úÖ Rows after cleaning: 206
‚úÖ Removed 70 rows

‚úÖ Cleaned Pune dataset saved at: C:\Users\91786\Desktop\Real_Estate_Price\data_clean\pune_clean.csv

üîπ Sample cleaned data:
           location  min_price_sqft  max_price_sqft  avg_price_sqft  \
0      Adarsh Nagar          6205.0          7395.0          6800.0   
1              Aher          3315.0          3868.0          3591.5   
2            Akurdi          4845.0          5695.0          5270.0   
3            Alandi          2550.0          2932.0          2741.0   
5    Ambedkar Nagar          4845.0          5482.0          5163.5   
6         Ambe Gaon          4165.0          5100.

In [34]:
import pandas as pd
from pathlib import Path

# Base path
BASE = Path(r"C:\Users\91786\Desktop\Real_Estate_Price\data_clean")

# List of all cleaned city CSVs
files = [
    BASE / "mumbai_clean.csv",
    BASE / "delhi_clean.csv",
    BASE / "blr_clean.csv",
    BASE / "hyderabad_clean.csv",
    BASE / "kolkata_clean.csv",
    BASE / "pune_clean.csv",
]

# To store dataframes
dfs = []

# Read each file and show its columns
for file in files:
    try:
        df = pd.read_csv(file)
        city = file.stem.split("_")[0].capitalize()
        print(f"\nüìÑ {city} Columns: {list(df.columns)}")
        df["city"] = city  # add city name if missing
        dfs.append(df)
    except Exception as e:
        print(f"‚ùå Error reading {file}: {e}")

# ‚úÖ Combine all datasets ‚Äî auto aligns columns
combined_df = pd.concat(dfs, axis=0, ignore_index=True)

# üîπ Optional: handle same-meaning columns with different names
rename_map = {
    "Flat_Price": "price_lakh",
    "min_price": "price_lakh",
    "max_price": "max_price_lakh",
    "buyrates_value_2": "price_range",
    "Total_Sq.ft": "total_sqft",
    "Price_per_sq.ft": "price_per_sqft",
    "localityname_value": "location",
    "Purpose": "purpose",
    "HOUSE_TYPE": "house_type",
    "Area_Type": "area_type",
    "Owner_type": "owner_type",
    "Owner_name": "owner_name",
}

# Rename wherever applicable
combined_df.rename(columns=rename_map, inplace=True)

# üîπ Drop columns with no useful data (optional)
combined_df.dropna(how='all', axis=1, inplace=True)

# üîπ Save final dataset
output_path = BASE / "India_Data.csv"
combined_df.to_csv(output_path, index=False)
print(f"\n‚úÖ Combined India dataset saved at: {output_path}")

# Show summary
print(f"\n‚úÖ Total rows combined: {len(combined_df)}")
print(f"‚úÖ Total columns: {len(combined_df.columns)}")
print(f"\nColumns in final dataset:\n{list(combined_df.columns)}")



üìÑ Mumbai Columns: ['bhk', 'property_type', 'location', 'total_sqft', 'price_value', 'price_unit', 'region', 'availability', 'age', 'city']

üìÑ Delhi Columns: ['Unnamed: 0', 'price_lakh', 'location', 'total_sqft', 'latitude', 'longitude', 'bhk', 'Bathrooms', 'balcony', 'status', 'neworold', 'parking', 'Furnished_status', 'Lift', 'Landmarks', 'building_type', 'desc', 'rate_per_sqft', 'city']

üìÑ Blr Columns: ['area_type', 'availability', 'location', 'size', 'total_sqft', 'bath', 'balcony', 'price', 'city', 'bhk']

üìÑ Hyderabad Columns: ['Unnamed: 0', 'property_type', 'location', 'price_lakh', 'rate_per_sqft', 'total_sqft', 'status', 'city']

üìÑ Kolkata Columns: ['price', 'emi_starts', 'bhk', 'house_type', 'purpose', 'location', 'area_type', 'total_sqft', 'price_per_sqft', 'owner_name', 'owner_type', 'city', 'price_lakh']

üìÑ Pune Columns: ['location', 'min_price_sqft', 'max_price_sqft', 'avg_price_sqft', 'price_trend', 'rent_2bhk', 'rent_3bhk', 'rent_4bhk', 'rent_5bhk', 're

In [35]:
import pandas as pd
from pathlib import Path

# Path to your merged file
path = Path(r"C:\Users\91786\Desktop\Real_Estate_Price\data_clean\India_Data.csv")

# Read the dataset
df = pd.read_csv(path)

# Print shape
print("‚úÖ Dataset shape:")
print(f"Rows (data points): {df.shape[0]}")
print(f"Columns (features): {df.shape[1]}\n")

# Print column names
print("üìã Column names:")
print(list(df.columns))

# Show some sample rows
print("\nüîπ Sample data preview:")
print(df.head(5))


  df = pd.read_csv(path)


‚úÖ Dataset shape:
Rows (data points): 104881
Columns (features): 44

üìã Column names:
['bhk', 'property_type', 'location', 'total_sqft', 'price_value', 'price_unit', 'region', 'availability', 'age', 'city', 'Unnamed: 0', 'price_lakh', 'latitude', 'longitude', 'Bathrooms', 'balcony', 'status', 'neworold', 'parking', 'Furnished_status', 'Lift', 'Landmarks', 'building_type', 'desc', 'rate_per_sqft', 'area_type', 'size', 'bath', 'price', 'emi_starts', 'house_type', 'purpose', 'price_per_sqft', 'owner_name', 'owner_type', 'min_price_sqft', 'max_price_sqft', 'avg_price_sqft', 'price_trend', 'rent_2bhk', 'rent_3bhk', 'rent_4bhk', 'rent_5bhk', 'rent_others']

üîπ Sample data preview:
  bhk property_type                              location  total_sqft  \
0   3     Apartment   Lak And Hanware The Residency Tower       685.0   
1   2     Apartment     Radheya Sai Enclave Building No 2       640.0   
2   2     Apartment                         Romell Serene       610.0   
3   2     Apartment

In [36]:
df = pd.read_csv(path, low_memory=False)

print("üèôÔ∏è Records per city:")
print(df['city'].value_counts())

(df.isnull().sum() / len(df) * 100).sort_values(ascending=True)


üèôÔ∏è Records per city:
city
Mumbai       76038
Blr          13273
Delhi         7738
Kolkata       3966
Hyderabad     3660
Pune           206
Name: count, dtype: int64


location             0.000000
city                 0.000000
total_sqft           0.196413
bhk                  3.701338
availability        14.845396
property_type       24.011022
price_unit          27.500691
price_value         27.500691
region              27.500691
age                 27.500691
balcony             82.995967
price               83.563276
area_type           83.563276
price_lakh          85.351017
size                87.359960
bath                87.414308
rate_per_sqft       89.132445
Unnamed: 0          89.132445
status              89.679732
latitude            92.622115
Bathrooms           92.622115
longitude           92.622115
building_type       92.622115
neworold            92.622115
desc                92.622115
Furnished_status    96.067925
house_type          96.218572
emi_starts          96.218572
purpose             96.218572
price_per_sqft      96.218572
owner_type          96.218572
owner_name          96.228106
Landmarks           97.369400
parking   