In [2]:
import pandas as pd
# from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
df = pd.read_csv('Housing.csv')
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
df.isna().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [6]:
df.describe(include = 'all')

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
count,545.0,545.0,545.0,545.0,545.0,545,545,545,545,545,545.0,545,545
unique,,,,,,2,2,2,2,2,,2,3
top,,,,,,yes,no,no,no,no,,no,semi-furnished
freq,,,,,,468,448,354,520,373,,417,227
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,,,,,,0.693578,,
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,,,,,,0.861586,,
min,1750000.0,1650.0,1.0,1.0,1.0,,,,,,0.0,,
25%,3430000.0,3600.0,2.0,1.0,1.0,,,,,,0.0,,
50%,4340000.0,4600.0,3.0,1.0,2.0,,,,,,0.0,,
75%,5740000.0,6360.0,3.0,2.0,2.0,,,,,,1.0,,


In [7]:
lstOutliers = ['price', 'area']
df_clean = df.copy()

def removeOutliers(df,lst):
    for _ in lst:
    
        Q1 = df[_].quantile(0.25)
        Q3 = df[_].quantile(0.75)

        IQR = Q3 - Q1 

        upperBound = Q3 + (1.5*IQR)
        lowerBound = Q1 - (1.5*IQR)

        df = df[(df[_] < upperBound) & (df[_] > lowerBound)]
    return df

    

In [8]:
df_clean = removeOutliers(df_clean,lstOutliers) 

In [9]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 517 entries, 15 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             517 non-null    int64 
 1   area              517 non-null    int64 
 2   bedrooms          517 non-null    int64 
 3   bathrooms         517 non-null    int64 
 4   stories           517 non-null    int64 
 5   mainroad          517 non-null    object
 6   guestroom         517 non-null    object
 7   basement          517 non-null    object
 8   hotwaterheating   517 non-null    object
 9   airconditioning   517 non-null    object
 10  parking           517 non-null    int64 
 11  prefarea          517 non-null    object
 12  furnishingstatus  517 non-null    object
dtypes: int64(6), object(7)
memory usage: 56.5+ KB


In [10]:
df_clean.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
15,9100000,6000,4,1,2,yes,no,yes,no,no,2,no,semi-furnished
16,9100000,6600,4,2,2,yes,yes,yes,no,yes,1,yes,unfurnished
17,8960000,8500,3,2,4,yes,no,no,no,yes,2,no,furnished
18,8890000,4600,3,2,2,yes,yes,no,no,yes,2,no,furnished
19,8855000,6420,3,2,2,yes,no,no,no,yes,1,yes,semi-furnished


In [11]:
# furnishingstatusEncoded = LabelEncoder()

# furnishingstatusEncoded.fit_transform(df_clean['furnishingstatus'])

In [12]:
df_clean['furnishingstatus'] = df_clean['furnishingstatus'].map({'unfurnished': 0, 'semi-furnished': 1, 'furnished': 2})

In [13]:
boolValues = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

df_clean[boolValues] = df_clean[boolValues].replace({'yes': 1, 'no': 0})

  df_clean[boolValues] = df_clean[boolValues].replace({'yes': 1, 'no': 0})


In [14]:
df_clean.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
15,9100000,6000,4,1,2,1,0,1,0,0,2,0,1
16,9100000,6600,4,2,2,1,1,1,0,1,1,1,0
17,8960000,8500,3,2,4,1,0,0,0,1,2,0,2
18,8890000,4600,3,2,2,1,1,0,0,1,2,0,2
19,8855000,6420,3,2,2,1,0,0,0,1,1,1,1


In [15]:
x = df_clean.drop('price', axis=1)
y = df_clean['price']

In [16]:
x

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
15,6000,4,1,2,1,0,1,0,0,2,0,1
16,6600,4,2,2,1,1,1,0,1,1,1,0
17,8500,3,2,4,1,0,0,0,1,2,0,2
18,4600,3,2,2,1,1,0,0,1,2,0,2
19,6420,3,2,2,1,0,0,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
540,3000,2,1,1,1,0,1,0,0,2,0,0
541,2400,3,1,1,0,0,0,0,0,0,0,1
542,3620,2,1,1,1,0,0,0,0,0,0,0
543,2910,3,1,1,0,0,0,0,0,0,0,2


In [17]:
y

15     9100000
16     9100000
17     8960000
18     8890000
19     8855000
        ...   
540    1820000
541    1767150
542    1750000
543    1750000
544    1750000
Name: price, Length: 517, dtype: int64

In [18]:
scaler = StandardScaler()

toBeScaled = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']

x[toBeScaled] = scaler.fit_transform(x[toBeScaled])
x

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
15,0.624429,1.447384,-0.561652,0.237201,1,0,1,0,0,1.597777,0,1
16,0.961991,1.447384,1.589268,0.237201,1,1,1,0,1,0.410922,1,0
17,2.030937,0.079381,1.589268,2.551031,1,0,0,0,1,1.597777,0,2
18,-0.163216,0.079381,1.589268,0.237201,1,1,0,0,1,1.597777,0,2
19,0.860722,0.079381,1.589268,0.237201,1,0,0,0,1,0.410922,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
540,-1.063381,-1.288621,-0.561652,-0.919714,1,0,1,0,0,1.597777,0,0
541,-1.400943,0.079381,-0.561652,-0.919714,0,0,0,0,0,-0.775932,0,1
542,-0.714567,-1.288621,-0.561652,-0.919714,1,0,0,0,0,-0.775932,0,0
543,-1.114015,0.079381,-0.561652,-0.919714,0,0,0,0,0,-0.775932,0,2


In [19]:
x_train, x_tmp, y_train, y_tmp = train_test_split(x, y, train_size=0.7, random_state=42)

x_vald, x_test, y_vald, y_test = train_test_split(x_tmp, y_tmp, train_size=0.5, random_state=42)

In [20]:
model = Lasso()

model.fit(x_train, y_train)

In [21]:
y_vald_pred = model.predict(x_vald)
y_test_pred = model.predict(x_test)

In [22]:
mseVald = mean_squared_error(y_vald, y_vald_pred)
mseTest = mean_squared_error(y_test, y_test_pred)
maeVald = mean_absolute_error(y_vald, y_vald_pred)
maeTest = mean_absolute_error(y_test, y_test_pred)
r2Vald = r2_score(y_vald, y_vald_pred)
r2Test = r2_score(y_test, y_test_pred)
print(f'MSE VALDIATION: {mseVald}\nMSE TEST: {mseTest}\nMAE VALDIATION: {maeVald}\nMAE TEST: {maeTest}\nR2 VALDIATION: {r2Vald}\nR2 TEST: {r2Test}\n')



MSE VALDIATION: 703675743361.7639
MSE TEST: 1045725549291.0002
MAE VALDIATION: 648846.8785194434
MAE TEST: 758137.9052954771
R2 VALDIATION: 0.7719203374752013
R2 TEST: 0.629025395382856



In [23]:
import pandas as pd
import dash
from dash import dcc
from dash import html
import plotly.express as px
import plotly.graph_objects as go 

actual_vs_predicted_df = pd.DataFrame({
    'Actual Price': y_test,
    'Predicted Price': y_test_pred
})
app = dash.Dash(__name__)

app.layout = html.Div(style={'fontFamily': 'Arial, sans-serif', 'padding': '20px'}, children=[

    html.H1(
        "Housing Price Analysis Dashboard",
        style={'textAlign': 'center', 'color': '#333', 'marginBottom': '30px'}
    ),

    
    html.Div(style={'display': 'flex', 'justifyContent': 'space-around', 'marginBottom': '30px'}, children=[

        html.Div(style={'width': '45%', 'padding': '10px', 'border': '1px solid #ddd', 'borderRadius': '5px'}, children=[
            html.Label("Select Furnishing Status:", style={'fontWeight': 'bold', 'marginBottom': '10px', 'display': 'block'}),
            dcc.Dropdown(
                id='furnishing-status-dropdown',
                options=[{'label': status.capitalize(), 'value': status}
                         for status in df['furnishingstatus'].unique()],
                value='furnished',
                clearable=False,
                style={'width': '100%'}
            ),
        ]),

        html.Div(style={'width': '45%', 'padding': '10px', 'border': '1px solid #ddd', 'borderRadius': '5px'}, children=[
            html.Label("Select Number of Stories:", style={'fontWeight': 'bold', 'marginBottom': '10px', 'display': 'block'}),
            dcc.Slider(
                id='stories-slider',
                min=df['stories'].min(),
                max=df['stories'].max(),
                step=1,
                value=df['stories'].max(),
                marks={str(story): str(story) for story in df['stories'].unique()},
                tooltip={"placement": "bottom", "always_visible": True}
            ),
        ]),
    ]),

    
    html.Div(style={'border': '1px solid #ddd', 'borderRadius': '5px', 'padding': '20px', 'marginBottom': '30px'}, children=[
        html.H2("Price vs. Area Scatter Plot", style={'textAlign': 'center', 'color': '#555', 'marginBottom': '20px'}),
        dcc.Graph(
            id='price-area-scatter-plot',
            figure={}
        )
    ]),

    
    html.Div(style={'border': '1px solid #ddd', 'borderRadius': '5px', 'padding': '20px'}, children=[
        html.H2("Actual vs. Predicted Prices", style={'textAlign': 'center', 'color': '#555', 'marginBottom': '20px'}),
        html.P(f"Model Performance: MAE = ${maeTest:,.2f}$, R² = ${r2Test:.2f}$",
               style={'textAlign': 'center', 'fontSize': '1.1em', 'color': '#666'}),
        dcc.Graph(
            id='actual-vs-predicted-plot',
            figure={} 
        )
    ])
])


@app.callback(
    dash.dependencies.Output('price-area-scatter-plot', 'figure'),
    [
        dash.dependencies.Input('furnishing-status-dropdown', 'value'),
        dash.dependencies.Input('stories-slider', 'value')
    ]
)
def update_scatter_plot(selected_furnishing_status, selected_stories):
    filtered_df = df[df['furnishingstatus'] == selected_furnishing_status]
    filtered_df = filtered_df[filtered_df['stories'] == selected_stories]

    fig = px.scatter(
        filtered_df,
        x="area",
        y="price",
        color="bedrooms",
        size="price",
        hover_name="price",
        title=f"Housing Prices by Area (Furnishing: {selected_furnishing_status.capitalize()}, Stories: {selected_stories})",
        labels={"area": "Area (sq ft)", "price": "Price ($)"},
        template="plotly_white"
    )

    fig.update_layout(
        xaxis_title="Area (sq ft)",
        yaxis_title="Price ($)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40}
    )
    return fig

@app.callback(
    dash.dependencies.Output('actual-vs-predicted-plot', 'figure'),
    [dash.dependencies.Input('furnishing-status-dropdown', 'value')]
)
def update_actual_vs_predicted_plot(value): 
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=actual_vs_predicted_df['Actual Price'],
        y=actual_vs_predicted_df['Predicted Price'],
        mode='markers',
        name='Predictions',
        marker=dict(color='blue', opacity=0.6)
    ))


    max_val = max(actual_vs_predicted_df['Actual Price'].max(), actual_vs_predicted_df['Predicted Price'].max())
    min_val = min(actual_vs_predicted_df['Actual Price'].min(), actual_vs_predicted_df['Predicted Price'].min())

    fig.add_trace(go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Perfect Prediction',
        line=dict(color='red', dash='dash')
    ))

    fig.update_layout(
        title='Actual vs. Predicted Housing Prices',
        xaxis_title='Actual Price ($)',
        yaxis_title='Predicted Price ($)',
        hovermode='closest',
        template="plotly_white",
        margin={"l": 40, "r": 40, "t": 60, "b": 40}
    )

    return fig


if __name__ == '__main__':
    app.run(debug=True)

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

num_of_rooms =[1,2,3,4,5,6,7]
prices = [155,197,244,300,356,407,488]

def simple_trick(b,m,num_of_rooms,prices,eta_1,eta_2):
    eta_1 = random.random()*0.1
    eta_2 = random.random()*0.1
    y = b +m*num_of_rooms
    if prices > y and num_of_rooms > 0:
        m += eta_1
        b += eta_2
    if prices > y and num_of_rooms < 0:
        m -= eta_1
        b += eta_2
    if prices < y and num_of_rooms > 0:
        m -= eta_1
        b -= eta_2
    if prices < y and num_of_rooms < 0:
        m += eta_1
        b -= eta_2
    return b,m
def square_trick(b,m,num_of_rooms,prices,eta_1):
    y = b + m*num_of_rooms
    b_n = b +m*eta_1*(prices-y)
    m_n = m +eta_1*(prices-y)*num_of_rooms
    return b_n,m_n

def absolute_trick(b,m,num_of_rooms,prices,eta_1):
    eta_1 = random.random()*0.1
    y = b +m*num_of_rooms
    if prices > y:
        b_n = b +eta_1
        m_n = m +eta_1*num_of_rooms
    if prices < y :
        b_n = b +eta_1
        m_n = m +eta_1*num_of_rooms
    return b_n,m_n

def linear_reg(num_of_rooms,prices,epoch =1000,learning_rate = 0.01):
    b = random.random()
    m = random.random()
    for i in range(epoch):
        e = random.randint(0,len(num_of_rooms)-1)
        x = num_of_rooms[e]
        y = prices[e]
        b,m = square_trick(b,m,x,y,eta_1=learning_rate)       
    return b,m
        

result = linear_reg(num_of_rooms, prices)
print(result)







(97.86729874318628, 56.05407893121717)
