In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('Housing.csv')
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished


##### Check for any Duplicated or Null Data

In [3]:
data.isna().any().sum(),data.duplicated().sum()

(0, 0)

In [4]:
data['price']= data['price']/100000

##### Map Some values

In [5]:
var = ['mainroad' , 'guestroom' , 'basement' , 'hotwaterheating' ,'prefarea' , 'airconditioning']

for i  in var: 
    data[i] = data[i].map({'yes':1,'no':0})
    
data['furnishingstatus']=data['furnishingstatus'].map({'furnished':2,'semi-furnished':1,'unfurnished':0})

In [6]:
data.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,133.0,7420,4,2,3,1,0,0,0,1,2,1,2
1,122.5,8960,4,4,4,1,0,0,0,1,3,0,2
2,122.5,9960,3,2,2,1,0,1,0,0,2,1,1


##### Taking a part of the data-set to train

In [7]:
len(data)
import random
X_train = data.iloc[:,1]
random_values = random.choices(X_train,k= int(len(data) * 0.8))
train_data = data[data['area'].isin(random_values)]
X_train = train_data.iloc[:,1:]


In [8]:
def rescale(X):
    m,n = X.shape
    for i in range(n):
        col = X.iloc[:,i]
        min_,max_,mu_ = col.min(),col.max(),col.mean()
        variance = max_ - min_
        for j in range(m):
            X.iloc[j,i] = (X.iloc[j,i] - mu_) / variance
    return X

In [9]:
rescale(X_train)

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,0.210405,0.206075,0.350467,0.384735,0.123832,-0.170561,-0.336449,-0.044393,0.682243,0.429128,0.766355,0.535047
3,0.217489,0.206075,0.350467,0.051402,0.123832,-0.170561,0.663551,-0.044393,0.682243,0.762461,0.766355,0.535047
4,0.210405,0.206075,-0.149533,0.051402,0.123832,0.829439,0.663551,-0.044393,0.682243,0.429128,-0.233645,0.535047
5,0.217489,0.006075,0.850467,-0.281931,0.123832,-0.170561,0.663551,-0.044393,0.682243,0.429128,0.766355,0.035047
6,0.313115,0.206075,0.850467,0.718069,0.123832,-0.170561,-0.336449,-0.044393,0.682243,0.429128,0.766355,0.035047
...,...,...,...,...,...,...,...,...,...,...,...,...
540,-0.180953,-0.193925,-0.149533,-0.281931,0.123832,-0.170561,0.663551,-0.044393,-0.317757,0.429128,-0.233645,-0.464953
541,-0.234078,0.006075,-0.149533,-0.281931,-0.876168,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,0.035047
542,-0.126057,-0.193925,-0.149533,-0.281931,0.123832,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,-0.464953
543,-0.188922,0.006075,-0.149533,-0.281931,-0.876168,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,0.535047


In [10]:
rescale(train_data.iloc[:,1:])

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,0.210405,0.206075,0.350467,0.384735,0.123832,-0.170561,-0.336449,-0.044393,0.682243,0.429128,0.766355,0.535047
3,0.217489,0.206075,0.350467,0.051402,0.123832,-0.170561,0.663551,-0.044393,0.682243,0.762461,0.766355,0.535047
4,0.210405,0.206075,-0.149533,0.051402,0.123832,0.829439,0.663551,-0.044393,0.682243,0.429128,-0.233645,0.535047
5,0.217489,0.006075,0.850467,-0.281931,0.123832,-0.170561,0.663551,-0.044393,0.682243,0.429128,0.766355,0.035047
6,0.313115,0.206075,0.850467,0.718069,0.123832,-0.170561,-0.336449,-0.044393,0.682243,0.429128,0.766355,0.035047
...,...,...,...,...,...,...,...,...,...,...,...,...
540,-0.180953,-0.193925,-0.149533,-0.281931,0.123832,-0.170561,0.663551,-0.044393,-0.317757,0.429128,-0.233645,-0.464953
541,-0.234078,0.006075,-0.149533,-0.281931,-0.876168,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,0.035047
542,-0.126057,-0.193925,-0.149533,-0.281931,0.123832,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,-0.464953
543,-0.188922,0.006075,-0.149533,-0.281931,-0.876168,-0.170561,-0.336449,-0.044393,-0.317757,-0.237539,-0.233645,0.535047


In [11]:
X_train = pd.DataFrame(X_train).to_numpy()
y_train = train_data.iloc[:,0]
y_train = pd.DataFrame(y_train).to_numpy()
b_init = 0
w_init = np.zeros(X_train.shape[1])

##### function to predict target after computing the weights

In [12]:
def predict_single_loop(x,w,b):
    return np.dot(x,w) + b

##### function to compute the cost of each iteration

In [13]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f = np.dot(x[i],w) +b
        cost += (f - y[i])**2
    cost = cost / (2*m)
    return cost
compute_cost(X_train,y_train,w_init,b_init)

array([1328.21955236])

##### function to compute the derivative of cost function

In [14]:
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_db = 0
    dj_dw = np.zeros((n,))
    for i in range(m):
        err = np.dot(x[i],w) + b - y[i]
        for j in range(n):
            dj_dw[j] += err * x[i,j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_db,dj_dw

In [15]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(tmp_dj_db)
print(tmp_dj_dw)

[-47.91490514]
[-1.7620529  -1.0432284  -2.43426582 -2.53146226 -1.81811559 -1.98814702
 -1.60610668 -0.53097384 -4.31196799 -2.05896241 -2.79136212 -2.41490836]


##### function to compute the gradient descent 

In [16]:
def gradient_descent(x,y,w_in,b_in,cost_func,grad_func,iters,alpha):
    J_hist = []
    w = w_in
    b = b_in
    for i in range(iters):
        d_b,d_w = grad_func(x,y,w,b)
        w = w - alpha * d_w
        b = b - alpha * d_b
        J_hist.append(cost_func(x,y,w,b))
    return w, b, J_hist

In [17]:
w,b,j=gradient_descent(X_train,y_train,w_init,b_init,compute_cost,compute_gradient,800,5.0e-8)
print(b)
print(w)
print(j[-20:])

[0.00191656]
[7.04818332e-05 4.17289489e-05 9.73702254e-05 1.01258021e-04
 7.27242118e-05 7.95253124e-05 6.42436962e-05 2.12389626e-05
 1.72477584e-04 8.23581368e-05 1.11653751e-04 9.65956928e-05]
[array([1328.12743925]), array([1328.12732131]), array([1328.12720337]), array([1328.12708543]), array([1328.1269675]), array([1328.12684956]), array([1328.12673162]), array([1328.12661368]), array([1328.12649574]), array([1328.12637781]), array([1328.12625987]), array([1328.12614193]), array([1328.12602399]), array([1328.12590605]), array([1328.12578812]), array([1328.12567018]), array([1328.12555224]), array([1328.1254343]), array([1328.12531637]), array([1328.12519843])]


In [18]:
pred =predict_single_loop(X_train,w,b)
train_data.loc[:,'pred'] = pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.loc[:,'pred'] = pred


In [19]:
train_data['pred'] *= 100000
train_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['pred'] *= 100000


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,pred
0,133.0000,7420,4,2,3,1,0,0,0,1,2,1,2,227.621710
3,122.1500,7500,4,2,2,1,0,1,0,1,3,1,2,233.466009
4,114.1000,7420,4,1,2,1,1,1,0,1,2,0,2,222.589457
5,108.5000,7500,3,3,1,1,0,1,0,1,2,1,1,226.549618
6,101.5000,8580,4,3,4,1,0,0,0,1,2,1,1,231.759619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,18.2000,3000,2,1,1,1,0,1,0,0,2,0,0,179.926808
541,17.6715,2400,3,1,1,0,0,0,0,0,0,0,1,166.029400
542,17.5000,3620,2,1,1,1,0,0,0,0,0,0,0,168.398816
543,17.5000,2910,3,1,1,0,0,0,0,0,0,0,2,171.177457


In [20]:
train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['area'],train_data['pred'] = train_data['pred'],train_data['area']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'area':'predictions','pred':'area'},inplace=True)


In [21]:
train_data['score'] = train_data['predictions'] / train_data['price']
train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['predictions'] / train_data['price']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['score'] = train_data['score'].apply(lambda x:x if x<1 else (1/x))


In [22]:
train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['bedrooms'],train_data['score'] = train_data['score'],train_data['bedrooms']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.rename(columns={'bedrooms':'score','score':'bedrooms'},inplace=True)


In [23]:
train_data

Unnamed: 0,price,predictions,score,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,area,bedrooms
0,133.0000,227.621710,0.584303,2,3,1,0,0,0,1,2,1,2,7420,4
3,122.1500,233.466009,0.523203,2,2,1,0,1,0,1,3,1,2,7500,4
4,114.1000,222.589457,0.512603,1,2,1,1,1,0,1,2,0,2,7420,4
5,108.5000,226.549618,0.478924,3,1,1,0,1,0,1,2,1,1,7500,3
6,101.5000,231.759619,0.437954,3,4,1,0,0,0,1,2,1,1,8580,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,18.2000,179.926808,0.101152,1,1,1,0,1,0,0,2,0,0,3000,2
541,17.6715,166.029400,0.106436,1,1,0,0,0,0,0,0,0,1,2400,3
542,17.5000,168.398816,0.103920,1,1,1,0,0,0,0,0,0,0,3620,2
543,17.5000,171.177457,0.102233,1,1,0,0,0,0,0,0,0,2,2910,3


In [24]:
n=data.sample()
p=predict_single_loop(n.iloc[:,1:],w,b) *100
print(f'Sample:  {n.iloc[0,0]}')
print(f'Predict: {p[0]}')
score = min(p[0]/n.iloc[0,0],n.iloc[0,0]/p[0])*100
print('Score:   {a:.2f}%'.format(a=score))


Sample:  52.29
Predict: 50.21936201630177
Score:   96.04%
