In [1]:
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/ML Thoery/House Price Prediction/Housing.csv')

In [4]:
df.iloc[0:3, :]

Unnamed: 0,area,bedroom,bathroom,floor,mainroad,basement,hotwaterheating,airconditioning,parking,furnishined,price
0,7420,4,2,3,yes,no,no,yes,2,fully,13300000
1,8960,4,4,4,yes,no,no,yes,3,fully,12250000
2,9960,3,2,2,yes,yes,no,no,2,partially,12250000


# Preprocessing of Data

In [5]:
df[['mainroad','basement','hotwaterheating','airconditioning']] = df[['mainroad','basement','hotwaterheating','airconditioning']].replace({'yes':1, 'no':0})
df = pd.get_dummies(df, columns = ['furnishined'])#one hot encoding

In [6]:
df.isnull().sum()

area                     0
bedroom                  0
bathroom                 0
floor                    0
mainroad                 0
basement                 0
hotwaterheating          0
airconditioning          0
parking                  0
price                    0
furnishined_fully        0
furnishined_no           0
furnishined_partially    0
dtype: int64

In [7]:
df.head(1)

Unnamed: 0,area,bedroom,bathroom,floor,mainroad,basement,hotwaterheating,airconditioning,parking,price,furnishined_fully,furnishined_no,furnishined_partially
0,7420,4,2,3,1,0,0,1,2,13300000,1,0,0


In [8]:
X = df[['area', 'bedroom', 'floor', 'mainroad', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'furnishined_fully', 'furnishined_no', 'furnishined_partially']]
Y = df[['price']]
print(type(X),type(Y))

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'>


In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3, random_state=0)

In [10]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train,Y_train)

In [None]:
y_predict = reg.predict(X_test)
print(y_predict)

In [12]:
#Check evalution of model
from sklearn.metrics import r2_score
r2 = r2_score(Y_test , y_predict)
print('Goodness of model is:- ',r2)

Goodness of model is:-  0.6492789232953736


In [13]:
def predict_price(area,bedroom,floor,mainroad,basement,hotwaterheating,airconditioning,parking,furnishined_fully,furnishined_no,furnishined_partially):
    inputs = {
        'area':[area],
        'bedroom':[bedroom],
        'floor':[floor],
        'mainroad':[mainroad],
        'basement':[basement],
        'hotwaterheating':[hotwaterheating],
        'airconditioning':[airconditioning],
        'parking':[parking],
        'furnishined_fully':[furnishined_fully],
        'furnishined_no':[furnishined_no],
        'furnishined_partially':[furnishined_partially]
    }

    df2 = pd.DataFrame(inputs)
    y_p = reg.predict(df2)
    return y_p

In [None]:
area = input('Enter Area(sqft): ')
bedroom = input('Bedrooms: ')
floor = input('Floors: ')
mainroad = input('Mainroad {1 for yes, 0 for no}: ')
basement = input('Basement {1 for yes, 0 for no}: ')
hotwaterheating = input('HotwaterHeating {1 for yes, 0 for no}: ')
airconditioning = input('Airconditioning {1 for yes, 0 for no}: ')
parking = input('Parking: ')
furnishined_fully = input('Fully furnished  {1 for yes, 0 for no}: ')
furnishined_no = input('Not Furnished {1 for yes, 0 for no}: ')
furnishined_partially = input('Partially furnished {1 for yes, 0 for no}: ')

predicted_price = predict_price(area, bedroom, floor, mainroad, basement, hotwaterheating, airconditioning, parking, furnishined_fully, furnishined_no, furnishined_partially)
predicted_price = float(predicted_price)
print(f"\nPredicted House Price: ${predicted_price:.2f}")


Enter Area(sqft): 2000
Bedrooms: 4
Floors: 2
Mainroad {1 for yes, 0 for no}: 0
Basement {1 for yes, 0 for no}: 1
HotwaterHeating {1 for yes, 0 for no}: 1
Airconditioning {1 for yes, 0 for no}: 1
Parking: 2
Fully furnished  {1 for yes, 0 for no}: 0
Not Furnished {1 for yes, 0 for no}: 0
Partially furnished {1 for yes, 0 for no}: 1

Predicted House Price: $6602543.32


# Manual Implementation

 The [1] in X.shape[1] indicates the number of columns (features) in the feature matrix X.

In [None]:
# Initialize model parameters
w = np.zeros(X.shape[1]) # weights
b = 0  # Bias
m = Y.shape[0]
alpha = 0.01 # alpha
print(m)

545


In [14]:
X = df[['area', 'bedroom', 'floor', 'mainroad', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'furnishined_fully', 'furnishined_no', 'furnishined_partially']]
Y = df[['price']]

In [15]:
# Cost Function
def mean_squared_error(X, Y, w, b):
  cost=0.0
  for i in range(m):
    f_wb_i = np.dot(X[i],w) + b
    cost = cost + (f_wb_i - Y[i])**2
  cost = cost / (2*m)
  return cost

In [16]:
# Derivative of J(w,b)
def derivatives(X,Y,w,b,m,sum_w=0.0,sum_b=0.0):
  for i in range(m):
    y_hat = np.dot(X[i],w) + b
    sum_w = sum_w + ((y_hat - Y[i]) * X[i])
    sum_b = sum_b + (y_hat - Y[i])

  dj_dw = sum_w / m
  dj_db = sum_b / m

  return dj_dw,dj_db

In [22]:
def gradient_descent(X, Y, w, b, m, alpha, epochs=50):
    for epoch in range(epochs):
        dj_dw, dj_db = derivatives(X, Y, w, b, m)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        print(w)
    return w, b


In [None]:
print(X_train.head(2))

In [23]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2, random_state=0)

# Initialize model parameters
w = np.zeros(X.shape[1]) # weights
b = 0  # Bias
m = Y.shape[0]
alpha = 0.01 # alpha
#print(m)

w_final, b_final = gradient_descent(X_train, Y_train, w, b, m , alpha)

for i in range(len(X_train)):

    prediction = np.dot(X_train.iloc[i], w_final) + b_final
    target_value = Y_train.iloc[i]
    print(f"prediction: {prediction:0.2f}, target value: {target_value}")


KeyError: ignored