In [22]:
import pandas as pd
import numpy as np

In [23]:
data = pd.read_csv('Housing.csv')
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished


##### Check for any Duplicated or Null Data

In [24]:
data.isna().any().sum(),data.duplicated().sum()

(0, 0)

In [25]:
data['price']= data['price']/100000

##### Map Some values

In [26]:
data['mainroad']=data['mainroad'].map({'yes':1,'no':0})
data['guestroom']=data['guestroom'].map({'yes':1,'no':0})
data['basement']=data['basement'].map({'yes':1,'no':0})
data['hotwaterheating']=data['hotwaterheating'].map({'yes':1,'no':0})
data['airconditioning']=data['airconditioning'].map({'yes':1,'no':0})
data['prefarea']=data['prefarea'].map({'yes':1,'no':0})
data['furnishingstatus']=data['furnishingstatus'].map({'furnished':2,'semi-furnished':1,'unfurnished':0})

In [27]:
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,133.0,7420,4,2,3,1,0,0,0,1,2,1,2
1,122.5,8960,4,4,4,1,0,0,0,1,3,0,2
2,122.5,9960,3,2,2,1,0,1,0,0,2,1,1


##### Taking a part of the data-set to train

In [28]:
len(data)
import random
X_train = data.iloc[:,1]
random_values = random.choices(X_train,k= int(len(data) * 0.8))
train_data = data[data['area'].isin(random_values)]
X_train = train_data.iloc[:,1:]


In [29]:
X_train = pd.DataFrame(X_train).to_numpy()
y_train = train_data.iloc[:,0]
y_train = pd.DataFrame(y_train).to_numpy()
b_init = 0
w_init = np.zeros(X_train.shape[1])

##### function to predict target after computing the weights

In [30]:
def predict_single_loop(x,w,b):
    return np.dot(x,w) + b

##### function to compute the cost of each iteration

In [31]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f = np.dot(x[i],w) +b
        cost += (f - y[i])**2
    cost = cost / (2*m)
    return cost
compute_cost(X_train,y_train,w_init,b_init)

array([1332.06779616])

##### function to compute the derivative of cost function

In [32]:
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_db = 0
    dj_dw = np.zeros((n,))
    for i in range(m):
        err = np.dot(x[i],w) + b - y[i]
        for j in range(n):
            dj_dw[j] += err * x[i,j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_db,dj_dw

In [33]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(tmp_dj_db)
print(tmp_dj_dw)

[-47.95678194]
[-2.67814597e+05 -1.47199533e+02 -6.70932366e+01 -9.37716650e+01
 -4.31160449e+01 -1.03840971e+01 -1.91144244e+01 -2.77582393e+00
 -1.89000664e+01 -3.85457196e+01 -1.42436648e+01 -4.86786542e+01]


##### function to compute the gradient descent 

In [34]:
def gradient_descent(x,y,w_in,b_in,cost_func,grad_func,iters,alpha):
    J_hist = []
    w = w_in
    b = b_in
    for i in range(iters):
        d_b,d_w = grad_func(x,y,w,b)
        w = w - alpha * d_w
        b = b - alpha * d_b
        J_hist.append(cost_func(x,y,w,b))
    return w, b, J_hist

In [35]:
w,b,j=gradient_descent(X_train,y_train,w_init,b_init,compute_cost,compute_gradient,800,5.0e-8)
print(b)
print(w)
print(j[-20:])

[0.00014349]
[8.67415029e-03 5.38446684e-04 3.20100959e-04 4.89915093e-04
 1.31092491e-04 5.85833622e-05 9.85378402e-05 2.36442747e-05
 1.24011278e-04 1.44422456e-04 7.91024998e-05 2.13345694e-04]
[array([170.43610343]), array([170.43607964]), array([170.43605585]), array([170.43603206]), array([170.43600826]), array([170.43598447]), array([170.43596068]), array([170.43593689]), array([170.43591309]), array([170.4358893]), array([170.43586551]), array([170.43584172]), array([170.43581793]), array([170.43579413]), array([170.43577034]), array([170.43574655]), array([170.43572276]), array([170.43569896]), array([170.43567517]), array([170.43565138])]


In [36]:
pred =predict_single_loop(X_train,w,b)
train_data['pred'] = pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['pred'] = pred


In [37]:
train_data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,pred
0,133.0000,7420,4,2,3,1,0,0,0,1,2,1,2,64.367652
1,122.5000,8960,4,4,4,1,0,0,0,1,3,0,2,77.727039
3,122.1500,7500,4,2,2,1,0,1,0,1,3,1,2,65.061337
4,114.1000,7420,4,1,2,1,1,1,0,1,2,0,2,64.366920
5,108.5000,7500,3,3,1,1,0,1,0,1,2,1,1,65.060271
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,18.9000,3649,2,1,1,1,0,0,0,0,0,0,0,31.654136
540,18.2000,3000,2,1,1,1,0,1,0,0,2,0,0,26.025000
541,17.6715,2400,3,1,1,0,0,0,0,0,0,0,1,20.820743
543,17.5000,2910,3,1,1,0,0,0,0,0,0,0,2,25.244773


In [38]:
train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)


In [39]:
train_data['score'] = train_data['predictions'] / train_data['price']
train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['predictions'] / train_data['price']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))


In [40]:
train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)


In [41]:
train_data

Unnamed: 0,price,predictions,score,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,area,bedrooms
0,133.0000,64.367652,0.483967,2,3,1,0,0,0,1,2,1,2,7420,4
1,122.5000,77.727039,0.634506,4,4,1,0,0,0,1,3,0,2,8960,4
3,122.1500,65.061337,0.532635,2,2,1,0,1,0,1,3,1,2,7500,4
4,114.1000,64.366920,0.564127,1,2,1,1,1,0,1,2,0,2,7420,4
5,108.5000,65.060271,0.599634,3,1,1,0,1,0,1,2,1,1,7500,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,18.9000,31.654136,0.597078,1,1,1,0,0,0,0,0,0,0,3649,2
540,18.2000,26.025000,0.699328,1,1,1,0,1,0,0,2,0,0,3000,2
541,17.6715,20.820743,0.848745,1,1,0,0,0,0,0,0,0,1,2400,3
543,17.5000,25.244773,0.693213,1,1,0,0,0,0,0,0,0,2,2910,3


In [42]:
n=data.sample()
p=predict_single_loop(n.iloc[:,1:],w,b)
print(n.iloc[0,0])
print(p[0])


33.6
35.7401506113913
