In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from dash import Dash, dcc, html, Input, Output, State


In [2]:

house = pd.read_csv("C:\\Users\\Anmo2\\OneDrive\\Desktop\\team one\\task 1 house\\Housing_sorted.csv")

print(house.head())
print(house.info())
print(house.describe())
print(house.isnull().sum())

# Split features/target
X = house.drop(["price", "options_description", "options"], axis=1)
y = house["price"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


      price      area  bedrooms  bathrooms  stories  mainroad  guestroom  \
0 -2.506190 -0.529529         3          1        2         1          0   
1 -2.506190 -0.684333         2          1        1         1          0   
2 -2.506190 -1.232991         3          1        1         0          0   
3 -2.479962 -1.717222         3          1        1         0          0   
4 -2.400708 -1.156444         2          1        1         1          0   

   basement  hotwaterheating  airconditioning  parking  prefarea  \
0         0                0                0        0         0   
1         0                0                0        0         0   
2         0                0                0        0         0   
3         0                0                0        0         0   
4         1                0                0        2         0   

   furnishingstatus  options options_description  
0                 0        1         half option  
1                 0        1    

In [3]:

model = LinearRegression()
model.fit(X_train, y_train)
y_pred_lin = model.predict(X_test)

r2_lin = r2_score(y_test, y_pred_lin)
rmse_lin = np.sqrt(mean_squared_error(y_test, y_pred_lin))
print("Linear Regression R²:", r2_lin)
print("Linear Regression RMSE:", rmse_lin)


Linear Regression R²: 0.7282377313351056
Linear Regression RMSE: 0.610207809546281


In [4]:

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

r2_rf = r2_score(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
print("Random Forest R²:", r2_rf)
print("Random Forest RMSE:", rmse_rf)


Random Forest R²: 0.6594650172163815
Random Forest RMSE: 0.6830681701617771


In [5]:

fig1 = px.scatter(x=y_test, y=y_pred_lin,
                  labels={"x": "Actual Prices", "y": "Predicted Prices"},
                  title=f"Actual vs Predicted Prices (Linear Regression, R²={r2_lin:.2f})")
fig1.add_shape(type="line",
               x0=y_test.min(), y0=y_test.min(),
               x1=y_test.max(), y1=y_test.max(),
               line=dict(color="red", dash="dash"))

fig2 = px.scatter(x=y_test, y=y_pred_rf,
                  labels={"x": "Actual Prices", "y": "Predicted Prices"},
                  title=f"Actual vs Predicted Prices (Random Forest, R²={r2_rf:.2f})")
fig2.add_shape(type="line",
               x0=y_test.min(), y0=y_test.min(),
               x1=y_test.max(), y1=y_test.max(),
               line=dict(color="red", dash="dash"))

fig3 = px.histogram(house, x="price", nbins=30, title="Distribution of House Prices")

corr = house.corr(numeric_only=True)
fig4 = px.imshow(corr, text_auto=True, aspect="auto",
                 color_continuous_scale="RdBu", title="Correlation Heatmap")

coef_series = pd.Series(model.coef_, index=X_train.columns).sort_values(key=abs, ascending=False)
fig5 = px.bar(x=coef_series.values, y=coef_series.index,
              orientation="h", title="Feature Importance (Linear Regression)")

feat_imp = pd.Series(rf.feature_importances_, index=X_train.columns).sort_values(ascending=False)
fig6 = px.bar(x=feat_imp.values, y=feat_imp.index,
              orientation="h", title="Feature Importance (Random Forest)")

fig7 = px.box(house, x="furnishingstatus", y="price", title="Price vs Furnishing Status")
fig8 = px.box(house, x="bedrooms", y="price", title="Price vs Number of Bedrooms")


In [6]:

app = Dash(__name__)
app.layout = html.Div([
    html.H1("🏠 Housing Dashboard", style={"textAlign": "center", "color": "white"}),

    html.Div([
        html.H2("📊 Data Visualizations", style={"color": "lightblue"}),
        dcc.Graph(figure=fig1),
        dcc.Graph(figure=fig2),
        dcc.Graph(figure=fig3),
        dcc.Graph(figure=fig4),
        dcc.Graph(figure=fig5),
        dcc.Graph(figure=fig6),
        dcc.Graph(figure=fig7),
        dcc.Graph(figure=fig8),
    ], style={"padding": "20px"}),

    html.H2("🔮 House Price Prediction", style={"color": "lightgreen"}),

    html.Div([
        html.Label("Area (sqft)"), dcc.Input(id="area", type="number", value=2000),
        html.Label("Bedrooms"), dcc.Input(id="bedrooms", type="number", value=3),
        html.Label("Bathrooms"), dcc.Input(id="bathrooms", type="number", value=2),
        html.Label("Stories"), dcc.Input(id="stories", type="number", value=1),
        html.Label("Parking"), dcc.Input(id="parking", type="number", value=1),
        html.Label("Mainroad (yes=1/no=0)"), dcc.Input(id="mainroad", type="number", value=1),
        html.Label("Guestroom (yes=1/no=0)"), dcc.Input(id="guestroom", type="number", value=0),
        html.Label("Basement (yes=1/no=0)"), dcc.Input(id="basement", type="number", value=0),
        html.Label("Hotwaterheating (yes=1/no=0)"), dcc.Input(id="hotwaterheating", type="number", value=0),
        html.Label("Airconditioning (yes=1/no=0)"), dcc.Input(id="airconditioning", type="number", value=1),
        html.Label("Prefarea (yes=1/no=0)"), dcc.Input(id="prefarea", type="number", value=1),
        html.Label("Furnishing Status (0=Unfurnished,1=Semi,2=Furnished)"), dcc.Input(id="furnishingstatus", type="number", value=1),
        html.Br(), html.Br(),
        html.Button("Predict Price", id="predict-btn", n_clicks=0),
        html.H3(id="prediction-output", style={"color": "yellow"})
    ], style={
        "display": "grid", "gridTemplateColumns": "1fr 1fr",
        "gap": "10px", "backgroundColor": "#111",
        "padding": "20px", "borderRadius": "15px", "color": "white"
    })
], style={"backgroundColor": "#222", "padding": "20px"})

@app.callback(
    Output("prediction-output", "children"),
    Input("predict-btn", "n_clicks"),
    State("area", "value"), State("bedrooms", "value"), State("bathrooms", "value"),
    State("stories", "value"), State("parking", "value"), State("mainroad", "value"),
    State("guestroom", "value"), State("basement", "value"), State("hotwaterheating", "value"),
    State("airconditioning", "value"), State("prefarea", "value"), State("furnishingstatus", "value")
)
def predict_price(n_clicks, area, bedrooms, bathrooms, stories, parking,
                  mainroad, guestroom, basement, hotwaterheating, airconditioning,
                  prefarea, furnishingstatus):
    if n_clicks > 0:
        cols = ["area", "bedrooms", "bathrooms", "stories",
                "mainroad", "guestroom", "basement",
                "hotwaterheating", "airconditioning",
                "parking", "prefarea", "furnishingstatus"]

        user_data = pd.DataFrame([[area, bedrooms, bathrooms, stories,
                                   mainroad, guestroom, basement,
                                   hotwaterheating, airconditioning,
                                   parking, prefarea, furnishingstatus]],
                                 columns=cols)
        predicted_price = model.predict(user_data)[0]
        return f"Predicted house price: {predicted_price:,.2f}"
    return ""





if __name__ == "__main__":
    app.run(debug=True)
