<a href="https://colab.research.google.com/github/TanishqSoni2003/House-Price-Prediction-System/blob/main/House_Price_Prediction_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install pandas scikit-learn joblib dash plotly
!pip install dash

import pandas as pd
import numpy as np
import joblib
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
import dash



In [9]:
# Load the dataset
data = pd.read_csv('house_prices.csv')
print(data)

        Id  MSSubClass MSZoning  LotArea LotConfig BldgType  OverallCond  \
0        0          60       RL     8450    Inside     1Fam            5   
1        1          20       RL     9600       FR2     1Fam            8   
2        2          60       RL    11250    Inside     1Fam            5   
3        3          70       RL     9550    Corner     1Fam            5   
4        4          60       RL    14260       FR2     1Fam            5   
...    ...         ...      ...      ...       ...      ...          ...   
2914  2914         160       RM     1936    Inside    Twnhs            7   
2915  2915         160       RM     1894    Inside   TwnhsE            5   
2916  2916          20       RL    20000    Inside     1Fam            7   
2917  2917          85       RL    10441    Inside     1Fam            5   
2918  2918          60       RL     9627    Inside     1Fam            5   

      YearBuilt  YearRemodAdd Exterior1st  BsmtFinSF2  TotalBsmtSF  SalePrice  
0      

In [10]:
# Separate features and target variable
X = data.drop(columns=['SalePrice'])
y = data['SalePrice']

# Identify numerical and categorical columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

In [11]:
# Preprocessing for numerical data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [12]:
# Preprocessing of training data
X_preprocessed = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Handle NaN values in the target variable (e.g., impute with the mean)
y_train = y_train.fillna(y_train.mean())

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model
joblib.dump(model, 'house_price_model.pkl')

# Load the model
loaded_model = joblib.load('house_price_model.pkl')

In [13]:
# Create the Dash app
app = dash.Dash(__name__)

In [14]:
# Layout of the dashboard
app.layout = html.Div([
    html.H1("House Price Prediction Dashboard"),
    dcc.Input(id='input-features', type='text', placeholder='Enter features as JSON'),
    html.Button('Predict', id='predict-button', n_clicks=0),
    html.Div(id='prediction-output'),
    dcc.Graph(id='price-distribution')
])

# Callback to update the prediction
@app.callback(
    Output('prediction-output', 'children'),
    Input('predict-button', 'n_clicks'),
    Input('input-features', 'value')
)
def update_prediction(n_clicks, value):
    if n_clicks > 0 and value:
        features = np.array(eval(value)).reshape(1, -1)
        prediction = loaded_model.predict(features)
        return f'Predicted House Price: ${prediction[0]:,.2f}'
    return 'Enter features and click Predict'

# Callback to update the price distribution graph
@app.callback(
    Output('price-distribution', 'figure'),
    Input('predict-button', 'n_clicks')
)
def update_graph(n_clicks):
    fig = px.histogram(data, x='SalePrice', nbins=50, title='Distribution of House Prices')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

<IPython.core.display.Javascript object>