## Housing Prices Prediction using sklearn.linear_model LinearRegression

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import numpy as np

In [20]:
df = pd.read_csv('Housing.csv')

print(df.head())

      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


In [21]:
binary_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

df[binary_cols] = df[binary_cols].apply(lambda x: x.map({'yes': 1, 'no': 0}))



status = pd.get_dummies(df['furnishingstatus'], drop_first=True)

df = pd.concat([df, status], axis=1)
df.drop(['furnishingstatus'], axis=1, inplace=True)

print(df.head())

      price  area  bedrooms  bathrooms  stories  mainroad  guestroom  \
0  13300000  7420         4          2        3         1          0   
1  12250000  8960         4          4        4         1          0   
2  12250000  9960         3          2        2         1          0   
3  12215000  7500         4          2        2         1          0   
4  11410000  7420         4          1        2         1          1   

   basement  hotwaterheating  airconditioning  parking  prefarea  \
0         0                0                1        2         1   
1         0                0                1        3         0   
2         1                0                0        2         1   
3         1                0                1        3         1   
4         1                0                1        2         0   

   semi-furnished  unfurnished  
0           False        False  
1           False        False  
2            True        False  
3           False        F

In [22]:
X = df.drop('price', axis=1)
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [23]:
y_pred = model.predict(X_test)

mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\n--- Model Evaluation ---")
print(f'Mean Absolute Error (MAE): {mae:,.0f}')
print(f'Mean Squared Error (MSE): {mse:,.0f}')
print(f'R-squared (R²): {r2*100:.4f}%') ## What precent can our model guess 


--- Model Evaluation ---
Mean Absolute Error (MAE): 875,813
Mean Squared Error (MSE): 1,451,838,962,598
R-squared (R²): 68.7528%


#### Housing Prices Old

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
housesdata = pd.read_csv("Housing.csv")
print(housesdata)

        price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0    13300000  7420         4          2        3      yes        no       no   
1    12250000  8960         4          4        4      yes        no       no   
2    12250000  9960         3          2        2      yes        no      yes   
3    12215000  7500         4          2        2      yes        no      yes   
4    11410000  7420         4          1        2      yes       yes      yes   
..        ...   ...       ...        ...      ...      ...       ...      ...   
540   1820000  3000         2          1        1      yes        no      yes   
541   1767150  2400         3          1        1       no        no       no   
542   1750000  3620         2          1        1      yes        no       no   
543   1750000  2910         3          1        1       no        no       no   
544   1750000  3850         3          1        2      yes        no       no   

    hotwaterheating aircond

In [4]:
pricesdata = tf.constant(housesdata['price'].values.reshape(-1,1), dtype=tf.float32)

area=tf.constant(housesdata['area'].values.reshape(-1,1), dtype=tf.float32)
beadrooms=tf.constant(housesdata['bedrooms'].values.reshape(-1,1), dtype=tf.float32)
bathrooms=tf.constant(housesdata['bathrooms'].values.reshape(-1,1), dtype=tf.float32)
stories=tf.constant(housesdata['stories'].values.reshape(-1,1), dtype=tf.float32)
parking=tf.constant(housesdata['parking'].values.reshape(-1,1), dtype=tf.float32)



guestroom = housesdata['guestroom'].map({'yes': 2, 'no': 1})
guestroom=tf.constant(guestroom.values.reshape(-1,1), dtype=tf.float32)


basement = housesdata['basement'].map({'yes': 2, 'no': 1})
basement=tf.constant(basement.values.reshape(-1,1), dtype=tf.float32)

hotwaterheating = housesdata['hotwaterheating'].map({'yes': 2, 'no': 1})
hotwaterheating=tf.constant(hotwaterheating.values.reshape(-1,1), dtype=tf.float32)


airconditioning = housesdata['airconditioning'].map({'yes': 2, 'no': 1})
airconditioning=tf.constant(airconditioning.values.reshape(-1,1), dtype=tf.float32)

prefarea = housesdata['prefarea'].map({'yes': 2, 'no': 1})
prefarea=tf.constant(prefarea.values.reshape(-1,1), dtype=tf.float32)

furnishingstatus = housesdata['furnishingstatus'].map({'furnished': 3, 'unfurnished': 2, 'semi-furnished': 1})
furnishingstatus=tf.constant(furnishingstatus.values.reshape(-1,1), dtype=tf.float32)


In [16]:
learning_rate = 0.2

def loss_fn(y_true, y_pred):
  return tf.reduce_mean(tf.square(y_true - y_pred))


In [None]:
def calval(w, k, x):
    return w *( x ** k)

def housing_model(*values):
    sum = 0
    for value in values:
        sum += calval(*value)
    
    return sum

In [17]:
warea = tf.Variable(tf.random.normal(shape=[]), name="warea")
karea = tf.Variable(tf.random.normal(shape=[]), name="karea")

wbedrooms = tf.Variable(tf.random.normal(shape=[]), name="wbedrooms")
kbedrooms = tf.Variable(tf.random.normal(shape=[]), name="kbedrooms")

wbathrooms = tf.Variable(tf.random.normal(shape=[]), name="wbathrooms")
kbathrooms = tf.Variable(tf.random.normal(shape=[]), name="kbathrooms")

wstories = tf.Variable(tf.random.normal(shape=[]), name="wstories")
kstories = tf.Variable(tf.random.normal(shape=[]), name="kstories")

wparking = tf.Variable(tf.random.normal(shape=[]), name="wparking")
kparking = tf.Variable(tf.random.normal(shape=[]), name="kparking")

wguestroom = tf.Variable(tf.random.normal(shape=[]), name="wguestroom")
kguestroom = tf.Variable(tf.random.normal(shape=[]), name="kguestroom")

wbasement = tf.Variable(tf.random.normal(shape=[]), name="wbasement")
kbasement = tf.Variable(tf.random.normal(shape=[]), name="kbasement")

whotwaterheating = tf.Variable(tf.random.normal(shape=[]), name="whotwaterheating")
khotwaterheating = tf.Variable(tf.random.normal(shape=[]), name="khotwaterheating")

wairconditioning = tf.Variable(tf.random.normal(shape=[]), name="wairconditioning")
kairconditioning = tf.Variable(tf.random.normal(shape=[]), name="kairconditioning")

wprefarea = tf.Variable(tf.random.normal(shape=[]), name="wprefarea")
kprefarea = tf.Variable(tf.random.normal(shape=[]), name="kprefarea")

wfurnishingstatus = tf.Variable(tf.random.normal(shape=[]), name="wfurnishingstatus")
kfurnishingstatus = tf.Variable(tf.random.normal(shape=[]), name="kfurnishingstatus")


optimizer = tf.keras.optimizers.Adam(learning_rate)

for epoch in range(2000):
    with tf.GradientTape() as tape:
        areadata = [warea, karea, area]
        bedroomsdata = [wbedrooms, kbedrooms, beadrooms]
        bathroomsdata = [wbathrooms, kbathrooms, bathrooms]
        storiesdata = [wstories, kstories, stories]
        parkingdata = [wparking, kparking, parking]
        guestroomdata = [wguestroom, kguestroom, guestroom]
        basementdata = [wbasement, kbasement, basement]
        hotwaterheatingdata = [whotwaterheating, khotwaterheating, hotwaterheating]
        airconditioningdata = [wairconditioning, kairconditioning, airconditioning]
        prefareadata = [wprefarea, kprefarea, prefarea]
        furnishingstatusdata = [wfurnishingstatus, kfurnishingstatus, furnishingstatus]


        y_pred = housing_model(areadata, bedroomsdata, bathroomsdata, 
                               storiesdata, parkingdata, guestroomdata, basementdata, prefareadata, airconditioningdata, hotwaterheatingdata, furnishingstatusdata)



        current_loss = loss_fn(pricesdata, y_pred)

    gradients = tape.gradient(current_loss, [warea, karea])

    optimizer.apply_gradients(zip(gradients, [warea, karea]))

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1:3d}, Loss: {current_loss:.4f}, a: {warea.numpy():.4f}, b: {karea.numpy():.4f}") 


#Needs too many epochs

Epoch 100, Loss: 25128765227008.0000, a: 20.7475, b: 0.7901
Epoch 200, Loss: 24113296965632.0000, a: 40.4450, b: 0.7901
Epoch 300, Loss: 23133241212928.0000, a: 59.9178, b: 0.7901
Epoch 400, Loss: 22187557781504.0000, a: 79.1703, b: 0.7901
Epoch 500, Loss: 21275252621312.0000, a: 98.2069, b: 0.7901
Epoch 600, Loss: 20395363139584.0000, a: 117.0316, b: 0.7901
Epoch 700, Loss: 19546979172352.0000, a: 135.6481, b: 0.7901
Epoch 800, Loss: 18729219915776.0000, a: 154.0600, b: 0.7901
Epoch 900, Loss: 17941229731840.0000, a: 172.2705, b: 0.7901
Epoch 1000, Loss: 17182203314176.0000, a: 190.2827, b: 0.7901
Epoch 1100, Loss: 16451360522240.0000, a: 208.0992, b: 0.7901
Epoch 1200, Loss: 15747948478464.0000, a: 225.7227, b: 0.7901
Epoch 1300, Loss: 15071244713984.0000, a: 243.1552, b: 0.7901
Epoch 1400, Loss: 14420554022912.0000, a: 260.3988, b: 0.7901
Epoch 1500, Loss: 13795206365184.0000, a: 277.4550, b: 0.7901
Epoch 1600, Loss: 13194553720832.0000, a: 294.3253, b: 0.7901
Epoch 1700, Loss: 1261

In [18]:
print(f"area: {warea.numpy():.4f} : {karea.numpy():.4f}")
print(f"bedrooms: {wbedrooms.numpy():.4f}  :{kbedrooms.numpy():.4f}")
print(f"bathrooms: {wbathrooms.numpy():.4f} : {kbathrooms.numpy():.4f}")
print(f"stories: {wstories.numpy():.4f} : {kstories.numpy():.4f}")
print(f"parking: {wparking.numpy():.4f} : {kparking.numpy():.4f}")
print(f"guestrooms: {wguestroom.numpy():.4f} : {kguestroom.numpy():.4f}")
print(f"basement: {wbasement.numpy():.4f} : {kbasement.numpy():.4f}")
print(f"hotwater: {whotwaterheating.numpy():.4f} : {khotwaterheating.numpy():.4f}")
print(f"prefarea: {wprefarea.numpy():.4f} : {kprefarea.numpy():.4f}")
print(f"furnished: {wfurnishingstatus.numpy():.4f} : {wfurnishingstatus.numpy():.4f}")

area: 359.9637 : 0.7901
bedrooms: 0.5434  :1.7439
bathrooms: 0.5319 : -0.6703
stories: -0.1482 : -0.5069
parking: -1.0358 : 0.4627
guestrooms: -0.2686 : -0.9843
basement: 0.3881 : -0.6318
hotwater: 0.8743 : -0.7590
prefarea: -1.9666 : -1.0984
furnished: -2.3674 : -2.3674
