In [3]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from plotly.subplots import make_subplots
import plotly.colors as colors

pio.templates.default = "plotly_white"

In [4]:
df = pd.read_json('C:/Users/abu_s/Desktop/Data/json/NYhouse/nyhouse.json')

In [5]:
df.head(3)

Unnamed: 0,BROKERTITLE,TYPE,PRICE,BEDS,BATH,PROPERTYSQFT,ADDRESS,STATE,MAIN_ADDRESS,ADMINISTRATIVE_AREA_LEVEL_2,LOCALITY,SUBLOCALITY,STREET_NAME,LONG_NAME,FORMATTED_ADDRESS,LATITUDE,LONGITUDE
0,Brokered by Douglas Elliman -111 Fifth Ave,Condo for sale,315000,2,2.0,1400.0,2 E 55th St Unit 803,"New York, NY 10022","2 E 55th St Unit 803New York, NY 10022",New York County,New York,Manhattan,East 55th Street,Regis Residence,"Regis Residence, 2 E 55th St #803, New York, N...",40.761255,-73.974483
1,Brokered by Serhant,Condo for sale,195000000,7,10.0,17545.0,Central Park Tower Penthouse-217 W 57th New Yo...,"New York, NY 10019",Central Park Tower Penthouse-217 W 57th New Yo...,United States,New York,New York County,New York,West 57th Street,"217 W 57th St, New York, NY 10019, USA",40.766393,-73.980991
2,Brokered by Sowae Corp,House for sale,260000,4,2.0,2015.0,620 Sinclair Ave,"Staten Island, NY 10312","620 Sinclair AveStaten Island, NY 10312",United States,New York,Richmond County,Staten Island,Sinclair Avenue,"620 Sinclair Ave, Staten Island, NY 10312, USA",40.541805,-74.196109


In [6]:
df.isnull().sum()

BROKERTITLE                    0
TYPE                           0
PRICE                          0
BEDS                           0
BATH                           0
PROPERTYSQFT                   0
ADDRESS                        0
STATE                          0
MAIN_ADDRESS                   0
ADMINISTRATIVE_AREA_LEVEL_2    0
LOCALITY                       0
SUBLOCALITY                    0
STREET_NAME                    0
LONG_NAME                      0
FORMATTED_ADDRESS              0
LATITUDE                       0
LONGITUDE                      0
dtype: int64

In [9]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd

# Load your data from a JSON file (adjust the file path to your dataset)
# Assuming you have a file named "real_estate_data.json"


# Check the length of the DataFrame
print(f"Number of records: {len(df)}")

# Initialize Dash app
app = dash.Dash(__name__)

# Visualization 1: Price Distribution (Histogram)
fig_price_dist = px.histogram(df, x="PRICE", title="Price Distribution", nbins=10)

# Visualization 2: Price vs. Square Footage (Scatter Plot)
fig_price_sqft = px.scatter(df, x="PROPERTYSQFT", y="PRICE", title="Price vs. Square Footage", 
                            labels={"PROPERTYSQFT": "Property Square Footage (sqft)", "PRICE": "Price ($)"})

# Visualization 3: Number of Bedrooms vs. Price (Box Plot)
fig_beds_price = px.box(df, x="BEDS", y="PRICE", title="Number of Bedrooms vs. Price", 
                        labels={"BEDS": "Number of Bedrooms", "PRICE": "Price ($)"})

# Visualization 4: Number of Bathrooms vs. Price (Box Plot)
fig_bath_price = px.box(df, x="BATH", y="PRICE", title="Number of Bathrooms vs. Price", 
                        labels={"BATH": "Number of Bathrooms", "PRICE": "Price ($)"})

# Visualization 5: Geographical Distribution of Properties (Map)
fig_geo = px.scatter_mapbox(df, lat="LATITUDE", lon="LONGITUDE", color="PRICE", size="PRICE", 
                            color_continuous_scale="Viridis", size_max=15, title="Geographical Distribution of Properties",
                            mapbox_style="carto-positron")

# Visualization 6: Price vs. Location (Heat Map)
fig_price_loc = px.density_mapbox(df, lat="LATITUDE", lon="LONGITUDE", z="PRICE", radius=10, 
                                  center=dict(lat=40.761255, lon=-73.9744834), zoom=10, 
                                  color_continuous_scale="Viridis", title="Price vs. Location Heat Map")

# Visualization 7: Property Types Distribution (Pie Chart)
fig_type_dist = px.pie(df, names="TYPE", title="Property Type Distribution", hole=0.3)

# Visualization 8: Property Count by Administrative Area (Bar Chart)
fig_area_count = px.bar(df, x="ADMINISTRATIVE_AREA_LEVEL_2", title="Property Count by Administrative Area")

# Visualization 9: Price Trend Over Time (Line Chart) - Assuming a time-based column is available
# In this case, we'll simulate this by adding a "Year" column and plotting Price by Year
df["YEAR"] = [2023] * len(df)  # Simulating data for the sake of the example
fig_price_year = px.line(df, x="YEAR", y="PRICE", title="Price Trend Over Time", markers=True)

# Visualization 10: Correlation Between Square Footage and Number of Bedrooms (Bubble Chart)
fig_sqft_beds = px.scatter(df, x="PROPERTYSQFT", y="BEDS", size="PRICE", color="TYPE", 
                           title="Square Footage vs. Number of Bedrooms", size_max=15, 
                           labels={"PROPERTYSQFT": "Property Square Footage (sqft)", "BEDS": "Number of Bedrooms", "PRICE": "Price ($)"})

# Layout of the Dashboard
app.layout = html.Div(children=[
    html.H1(children="Real Estate Price Dashboard"),

    # Visualization 1: Price Distribution
    dcc.Graph(id="price-dist", figure=fig_price_dist),

    # Visualization 2: Price vs. Square Footage
    dcc.Graph(id="price-sqft", figure=fig_price_sqft),

    # Visualization 3: Number of Bedrooms vs. Price
    dcc.Graph(id="beds-price", figure=fig_beds_price),

    # Visualization 4: Number of Bathrooms vs. Price
    dcc.Graph(id="bath-price", figure=fig_bath_price),

    # Visualization 5: Geographical Distribution
    dcc.Graph(id="geo-distribution", figure=fig_geo),

    # Visualization 6: Price vs. Location (Heat Map)
    dcc.Graph(id="price-location", figure=fig_price_loc),

    # Visualization 7: Property Types Distribution
    dcc.Graph(id="type-dist", figure=fig_type_dist),

    # Visualization 8: Property Count by Administrative Area
    dcc.Graph(id="area-count", figure=fig_area_count),

    # Visualization 9: Price Trend Over Time
    dcc.Graph(id="price-trend", figure=fig_price_year),

    # Visualization 10: Square Footage vs. Number of Bedrooms
    dcc.Graph(id="sqft-beds", figure=fig_sqft_beds)
])

# Run the app
# Run the app with a different port (for example, 8052 or 8053)
app.run_server(debug=True, port=8054)  # Change the port number here



Number of records: 4801
