#### Creation of the Database of the Real Estate of Mumbai 

***Step1: Import of various lib.***

In [5]:
import pandas as pd
import numpy as np
import sqlite3
from datetime import datetime, timedelta
import random

***Step3: Various Seeding process for the data to be present in the columns***

In [6]:
# Seed for reproducibility
np.random.seed(42)

# Number of properties
n = 10000

# Mumbai Localities
localities = ["Andheri", "Bandra", "Juhu", "Powai", "Borivali", "Dadar", "Malad", "Goregaon", "Kandivali", "Vikhroli"]

# Property Types
property_types = ["Apartment", "Villa", "Studio"]

# Furnishing
furnishing_status = ["Furnished", "Semi-Furnished", "Unfurnished"]

# Availability
availability_status = ["Ready to move", "Under construction"]

***Step4 : Creation of the various Columns of the Database***

In [7]:
# Generate synthetic data

data = {
    "property_id": range(1, n + 1),
    "locality": np.random.choice(localities, n),
    "property_type": np.random.choice(property_types, n, p=[0.7, 0.2, 0.1]),
    "bhk": np.random.choice([1, 2, 3, 4], n, p=[0.3, 0.4, 0.2, 0.1]),
    "size_sqft": np.round(np.random.normal(850, 300, n).clip(300, 2500), 2),
    "price_lakhs": np.round(np.random.normal(150, 50, n).clip(30, 500), 2),
    "rent_monthly": np.round(np.random.normal(45000, 15000, n).clip(10000, 200000), 0),
    "availability": np.random.choice(availability_status, n, p=[0.8, 0.2]),
    "possession_year": np.random.choice([2024, 2025, 2026, 2027], n, p=[0.5, 0.3, 0.15, 0.05]),
    "furnished_status": np.random.choice(furnishing_status, n, p=[0.4, 0.4, 0.2]),
    "posted_on": [datetime(2024, 1, 1) + timedelta(days=random.randint(0, 365)) for _ in range(n)]
}


# Create DataFrame
df = pd.DataFrame(data)

# Save to SQLite database
conn = sqlite3.connect("mumbai_real_estate.db")
df.to_sql("housing_data", conn, if_exists="replace", index=False)
conn.close()

print("✅ Mumbai real estate housing database created with 10,000 records.")


✅ Mumbai real estate housing database created with 10,000 records.


***Optioanl Work to Visualize the Database***

In [8]:
conn = sqlite3.connect("mumbai_real_estate.db")

# Read a sample from the housing_data table
df = pd.read_sql("SELECT * FROM housing_data LIMIT 5;", conn)
print(df)

conn.close()

   property_id  locality property_type  bhk  size_sqft  price_lakhs  \
0            1     Malad        Studio    1    1292.29       136.95   
1            2     Powai     Apartment    2     894.23       112.35   
2            3  Goregaon     Apartment    1     915.87       190.00   
3            4  Borivali        Studio    3     889.52       126.44   
4            5     Malad     Apartment    2    1019.09       260.05   

   rent_monthly   availability  possession_year furnished_status  \
0       55665.0  Ready to move             2025      Unfurnished   
1       41791.0  Ready to move             2025        Furnished   
2       29550.0  Ready to move             2025   Semi-Furnished   
3       50435.0  Ready to move             2025        Furnished   
4       40941.0  Ready to move             2025      Unfurnished   

             posted_on  
0  2024-04-23 00:00:00  
1  2024-04-08 00:00:00  
2  2024-06-26 00:00:00  
3  2024-07-07 00:00:00  
4  2024-09-26 00:00:00  
