In [1]:
import pandas as pd

In [2]:
data_set = pd.read_csv("Real_Estate.csv")

In [3]:
data_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Transaction date                     414 non-null    object 
 1   House age                            414 non-null    float64
 2   Distance to the nearest MRT station  414 non-null    float64
 3   Number of convenience stores         414 non-null    int64  
 4   Latitude                             414 non-null    float64
 5   Longitude                            414 non-null    float64
 6   House price of unit area             414 non-null    float64
dtypes: float64(5), int64(1), object(1)
memory usage: 22.8+ KB


In [4]:
data_set.head()

Unnamed: 0,Transaction date,House age,Distance to the nearest MRT station,Number of convenience stores,Latitude,Longitude,House price of unit area
0,2012-09-02 16:42:30.519336,13.3,4082.015,8,25.007059,121.561694,6.488673
1,2012-09-04 22:52:29.919544,35.5,274.0144,2,25.012148,121.54699,24.970725
2,2012-09-05 01:10:52.349449,1.1,1978.671,10,25.00385,121.528336,26.694267
3,2012-09-05 13:26:01.189083,22.2,1055.067,5,24.962887,121.482178,38.091638
4,2012-09-06 08:29:47.910523,8.5,967.4,6,25.011037,121.479946,21.65471


In [20]:
data_set.isnull().sum()

Transaction date                       0
House age                              0
Distance to the nearest MRT station    0
Number of convenience stores           0
Latitude                               0
Longitude                              0
House price of unit area               0
dtype: int64

In [21]:
data_set.duplicated().sum()

0

In [22]:
data_set['Transaction date'] = pd.to_datetime(data_set['Transaction date'])

In [23]:
data_set['Transaction date'] = data_set['Transaction date'].astype('int64') / 1e9 

In [24]:
# Selecting features and target variable
features = ['Distance to the nearest MRT station', 'Number of convenience stores', 'Latitude', 'Longitude']
target = 'House price of unit area'

X = data_set[features]
y = data_set[target]

In [25]:
data_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Transaction date                     414 non-null    float64
 1   House age                            414 non-null    float64
 2   Distance to the nearest MRT station  414 non-null    float64
 3   Number of convenience stores         414 non-null    int64  
 4   Latitude                             414 non-null    float64
 5   Longitude                            414 non-null    float64
 6   House price of unit area             414 non-null    float64
dtypes: float64(6), int64(1)
memory usage: 22.8 KB


In [11]:
X = data_set.drop(['House price of unit area'], axis=1)

In [26]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 4 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Distance to the nearest MRT station  414 non-null    float64
 1   Number of convenience stores         414 non-null    int64  
 2   Latitude                             414 non-null    float64
 3   Longitude                            414 non-null    float64
dtypes: float64(3), int64(1)
memory usage: 13.1 KB


In [13]:
y = data_set['House price of unit area']


In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
from sklearn.linear_model import LinearRegression
model=LinearRegression()

In [29]:
# Should be (n_samples,) or (n_samples, 1) for regression


In [30]:
model.fit(X_train, y_train)

In [31]:
pred = model.predict(X_test)  # Correct: only input features, no need to pass y_test


In [32]:
from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test,pred))


MAE 9.518038948836525


In [33]:
from sklearn.metrics import mean_squared_error
print("MSE",mean_squared_error(y_test,pred))

MSE 124.57938927549698


In [34]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test,pred)
print(r2)

0.5496730640943475


In [44]:
import numpy as np
from datetime import datetime

# Example test input values
distance_to_mrt = 274.0144  # Example distance to MRT station in meters
num_convenience_stores = 2  # Example number of convenience stores
latitude = 25.007059        # Example latitude
longitude = 121.561694      # Example longitude
house_age = 12              # Example house age (years)

# Get today's timestamp
time_date = datetime.now()  # Current date and time

# Convert time_date to a numerical value (e.g., seconds since the epoch)
time_date_num = (time_date - datetime(1970, 1, 1)).total_seconds()  # Total seconds since epoch


test_data = pd.DataFrame([[distance_to_mrt, num_convenience_stores, latitude, longitude]], 
                         columns=features)

# Output the test array
print(test_data)


   Distance to the nearest MRT station  Number of convenience stores  \
0                             274.0144                             2   

    Latitude   Longitude  
0  25.007059  121.561694  


In [46]:
model.predict(test_data)

array([32.00816436])

In [50]:
import dash
from dash import html, dcc, Input, Output, State
import pandas as pd

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.Div([
        html.H1("Real Estate Price Prediction", style={'text-align': 'center'}),
        
        html.Div([
            dcc.Input(id='distance_to_mrt', type='number', placeholder='Distance to MRT Station (meters)',
                      style={'margin': '10px', 'padding': '10px'}),
            dcc.Input(id='num_convenience_stores', type='number', placeholder='Number of Convenience Stores',
                      style={'margin': '10px', 'padding': '10px'}),
            dcc.Input(id='latitude', type='number', placeholder='Latitude',
                      style={'margin': '10px', 'padding': '10px'}),
            dcc.Input(id='longitude', type='number', placeholder='Longitude',
                      style={'margin': '10px', 'padding': '10px'}),
            html.Button('Predict Price', id='predict_button', n_clicks=0,
                        style={'margin': '10px', 'padding': '10px', 'background-color': '#007BFF', 'color': 'white'}),
        ], style={'text-align': 'center'}),
        
        html.Div(id='prediction_output', style={'text-align': 'center', 'font-size': '20px', 'margin-top': '20px'})
    ], style={'width': '50%', 'margin': '0 auto', 'border': '2px solid #007BFF', 'padding': '20px', 'border-radius': '10px'})
])

# Define callback to update output
@app.callback(
    Output('prediction_output', 'children'),
    [Input('predict_button', 'n_clicks')],
    [State('distance_to_mrt', 'value'), 
     State('num_convenience_stores', 'value'),
     State('latitude', 'value'),
     State('longitude', 'value')]
)
def update_output(n_clicks, distance_to_mrt, num_convenience_stores, latitude, longitude):
    if n_clicks > 0 and all(v is not None for v in [distance_to_mrt, num_convenience_stores, latitude, longitude]):
        # Prepare the feature vector
        features = pd.DataFrame([[distance_to_mrt, num_convenience_stores, latitude, longitude]], 
                                columns=['Distance to the nearest MRT station', 'Number of convenience stores', 'Latitude', 'Longitude'])
        # Predict
        prediction = model.predict(features)[0]
        return f'Predicted House Price of Unit Area: {prediction:.2f}'
    elif n_clicks > 0:
        return 'Please enter all values to get a prediction'
    return ''

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True,port=8051)