In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv('Housing.csv')
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished


##### Check for any Duplicated or Null Data

In [4]:
data.isna().any().sum(),data.duplicated().sum()

(0, 0)

In [5]:
data['price']= data['price']/100000

##### Map Some values

In [6]:
data['mainroad']=data['mainroad'].map({'yes':1,'no':0})
data['guestroom']=data['guestroom'].map({'yes':1,'no':0})
data['basement']=data['basement'].map({'yes':1,'no':0})
data['hotwaterheating']=data['hotwaterheating'].map({'yes':1,'no':0})
data['airconditioning']=data['airconditioning'].map({'yes':1,'no':0})
data['prefarea']=data['prefarea'].map({'yes':1,'no':0})
data['furnishingstatus']=data['furnishingstatus'].map({'furnished':2,'semi-furnished':1,'unfurnished':0})

In [7]:
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,133.0,7420,4,2,3,1,0,0,0,1,2,1,2
1,122.5,8960,4,4,4,1,0,0,0,1,3,0,2
2,122.5,9960,3,2,2,1,0,1,0,0,2,1,1


##### Taking a part of the data-set to train

In [8]:
len(data)
import random
X_train = data.iloc[:,1]
random_values = random.choices(X_train,k= int(len(data) * 0.8))
train_data = data[data['area'].isin(random_values)]
X_train = train_data.iloc[:,1:]


In [9]:
def rescale(X):
    m,n = X.shape
    for i in range(n):
        col = X.iloc[:,i]
        min_,max_,mu_ = col.min(),col.max(),col.mean()
        variance = max_ - min_
        for j in range(m):
            X.iloc[j,i] = (X.iloc[j,i] - mu_) / variance
    return X

In [10]:
rescale(X_train)

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,0.159952,0.204955,0.230480,0.389640,0.135135,-0.18018,-0.358108,-0.051802,0.675676,0.430180,0.756757,0.528153
1,0.270346,0.204955,0.897147,0.722973,0.135135,-0.18018,-0.358108,-0.051802,0.675676,0.763514,-0.243243,0.528153
2,0.342031,0.004955,0.230480,0.056306,0.135135,-0.18018,0.641892,-0.051802,-0.324324,0.430180,0.756757,0.028153
3,0.165686,0.204955,0.230480,0.056306,0.135135,-0.18018,0.641892,-0.051802,0.675676,0.763514,0.756757,0.528153
4,0.159952,0.204955,-0.102853,0.056306,0.135135,0.81982,0.641892,-0.051802,0.675676,0.430180,-0.243243,0.528153
...,...,...,...,...,...,...,...,...,...,...,...,...
539,-0.157611,-0.195045,-0.102853,-0.277027,-0.864865,-0.18018,-0.358108,-0.051802,-0.324324,0.096847,-0.243243,-0.471847
540,-0.156894,-0.195045,-0.102853,-0.277027,0.135135,-0.18018,0.641892,-0.051802,-0.324324,0.430180,-0.243243,-0.471847
541,-0.199905,0.004955,-0.102853,-0.277027,-0.864865,-0.18018,-0.358108,-0.051802,-0.324324,-0.236486,-0.243243,0.028153
542,-0.112450,-0.195045,-0.102853,-0.277027,0.135135,-0.18018,-0.358108,-0.051802,-0.324324,-0.236486,-0.243243,-0.471847


In [11]:
rescale(train_data.iloc[:,1:])

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,0.159952,0.204955,0.230480,0.389640,0.135135,-0.18018,-0.358108,-0.051802,0.675676,0.430180,0.756757,0.528153
1,0.270346,0.204955,0.897147,0.722973,0.135135,-0.18018,-0.358108,-0.051802,0.675676,0.763514,-0.243243,0.528153
2,0.342031,0.004955,0.230480,0.056306,0.135135,-0.18018,0.641892,-0.051802,-0.324324,0.430180,0.756757,0.028153
3,0.165686,0.204955,0.230480,0.056306,0.135135,-0.18018,0.641892,-0.051802,0.675676,0.763514,0.756757,0.528153
4,0.159952,0.204955,-0.102853,0.056306,0.135135,0.81982,0.641892,-0.051802,0.675676,0.430180,-0.243243,0.528153
...,...,...,...,...,...,...,...,...,...,...,...,...
539,-0.157611,-0.195045,-0.102853,-0.277027,-0.864865,-0.18018,-0.358108,-0.051802,-0.324324,0.096847,-0.243243,-0.471847
540,-0.156894,-0.195045,-0.102853,-0.277027,0.135135,-0.18018,0.641892,-0.051802,-0.324324,0.430180,-0.243243,-0.471847
541,-0.199905,0.004955,-0.102853,-0.277027,-0.864865,-0.18018,-0.358108,-0.051802,-0.324324,-0.236486,-0.243243,0.028153
542,-0.112450,-0.195045,-0.102853,-0.277027,0.135135,-0.18018,-0.358108,-0.051802,-0.324324,-0.236486,-0.243243,-0.471847


In [12]:
X_train = pd.DataFrame(X_train).to_numpy()
y_train = train_data.iloc[:,0]
y_train = pd.DataFrame(y_train).to_numpy()
b_init = 0
w_init = np.zeros(X_train.shape[1])

##### function to predict target after computing the weights

In [13]:
def predict_single_loop(x,w,b):
    return np.dot(x,w) + b

##### function to compute the cost of each iteration

In [14]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f = np.dot(x[i],w) +b
        cost += (f - y[i])**2
    cost = cost / (2*m)
    return cost
compute_cost(X_train,y_train,w_init,b_init)

array([1368.35138077])

##### function to compute the derivative of cost function

In [15]:
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_db = 0
    dj_dw = np.zeros((n,))
    for i in range(m):
        err = np.dot(x[i],w) + b - y[i]
        for j in range(n):
            dj_dw[j] += err * x[i,j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_db,dj_dw

In [16]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(tmp_dj_db)
print(tmp_dj_dw)

[-48.61553919]
[-1.6077565  -1.06684051 -1.77908073 -2.4828673  -1.99681498 -1.65704249
 -1.65237448 -0.38047883 -4.16102873 -2.36701498 -2.77788349 -2.2649582 ]


##### function to compute the gradient descent 

In [17]:
def gradient_descent(x,y,w_in,b_in,cost_func,grad_func,iters,alpha):
    J_hist = []
    w = w_in
    b = b_in
    for i in range(iters):
        d_b,d_w = grad_func(x,y,w,b)
        w = w - alpha * d_w
        b = b - alpha * d_b
        J_hist.append(cost_func(x,y,w,b))
    return w, b, J_hist

In [18]:
w,b,j=gradient_descent(X_train,y_train,w_init,b_init,compute_cost,compute_gradient,800,5.0e-8)
print(b)
print(w)
print(j[-20:])

[0.00194458]
[6.43100309e-05 4.26734422e-05 7.11629974e-05 9.93142719e-05
 7.98721735e-05 6.62811989e-05 6.60944403e-05 1.52191937e-05
 1.66440093e-04 9.46802128e-05 1.11114621e-04 9.05977833e-05]
[array([1368.25680939]), array([1368.2566883]), array([1368.25656722]), array([1368.25644613]), array([1368.25632505]), array([1368.25620396]), array([1368.25608288]), array([1368.25596179]), array([1368.2558407]), array([1368.25571962]), array([1368.25559853]), array([1368.25547745]), array([1368.25535636]), array([1368.25523528]), array([1368.25511419]), array([1368.25499311]), array([1368.25487202]), array([1368.25475094]), array([1368.25462985]), array([1368.25450877])]


In [19]:
pred =predict_single_loop(X_train,w,b)
train_data.loc[:,'pred'] = pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.loc[:,'pred'] = pred


In [20]:
train_data['pred'] *= 100000
train_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['pred'] *= 100000


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,pred
0,133.0000,7420,4,2,3,1,0,0,0,1,2,1,2,227.823278
1,122.5000,8960,4,4,4,1,0,0,0,1,3,0,2,228.632444
2,122.5000,9960,3,2,2,1,0,1,0,0,2,1,1,210.265828
3,122.1500,7500,4,2,2,1,0,1,0,1,3,1,2,234.315133
4,114.1000,7420,4,1,2,1,1,1,0,1,2,0,2,224.266804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539,18.5500,2990,2,1,1,0,0,0,0,0,1,0,0,167.122568
540,18.2000,3000,2,1,1,1,0,1,0,0,2,0,0,184.879846
541,17.6715,2400,3,1,1,0,0,0,0,0,0,0,1,169.077926
542,17.5000,3620,2,1,1,1,0,0,0,0,0,0,0,172.244210


In [21]:
train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)


In [22]:
train_data['score'] = train_data['predictions'] / train_data['price']
train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['predictions'] / train_data['price']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))


In [23]:
train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)


In [24]:
train_data

Unnamed: 0,price,predictions,score,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,area,bedrooms
0,133.0000,227.823278,0.583786,2,3,1,0,0,0,1,2,1,2,7420,4
1,122.5000,228.632444,0.535794,4,4,1,0,0,0,1,3,0,2,8960,4
2,122.5000,210.265828,0.582596,2,2,1,0,1,0,0,2,1,1,9960,3
3,122.1500,234.315133,0.521306,2,2,1,0,1,0,1,3,1,2,7500,4
4,114.1000,224.266804,0.508769,1,2,1,1,1,0,1,2,0,2,7420,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539,18.5500,167.122568,0.110996,1,1,0,0,0,0,0,1,0,0,2990,2
540,18.2000,184.879846,0.098442,1,1,1,0,1,0,0,2,0,0,3000,2
541,17.6715,169.077926,0.104517,1,1,0,0,0,0,0,0,0,1,2400,3
542,17.5000,172.244210,0.101600,1,1,1,0,0,0,0,0,0,0,3620,2


In [25]:
n=data.sample()
p=predict_single_loop(n.iloc[:,1:],w,b) *100
print(f'Sample:  {n.iloc[0,0]}')
print(f'Predict: {p[0]}')
score = min(p[0]/n.iloc[0,0],n.iloc[0,0]/p[0])*100
print('Score:   {a:.2f}%'.format(a=score))


Sample:  68.95
Predict: 49.77707138992953
Score:   72.19%
