In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing

In [4]:
df = pd.read_csv('../homeprices copy.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [5]:
df.isna().sum()
df.dropna(inplace = True)

In [6]:
X = df.drop('price', axis = 1)
y = df['price']

In [7]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
ones_col = np.ones(len(X))
ones_col = ones_col.reshape(len(X), 1)
X = np.hstack((X, ones_col))

In [6]:
w = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))

In [7]:
y_pred = np.dot(X, w)
y_pred

array([519866.710013  , 604304.29128739, 598930.42912874, 760000.        ,
       796898.56957087])

In [8]:
r_squared = 1 - (np.sum(np.square(y - y_pred)) / np.sum(np.square(y - np.mean(y))))
r_squared

0.9543026098066568

In [9]:
def prepare_data(a, b):
    bais_col = np.ones(len(b))
    bais_col = bais_col.reshape(len(b), 1)
    a = np.hstack((a, bais_col))
    return a, b

In [32]:
def init_weights(a, b):
    w = np.zeros(a.shape[1])
    w = w.reshape(len(w), 1)
    return w

In [77]:
def training(X, y, w):
    n = X.shape[0]
    alpha = 0.01
    for itr in range(1000):
        y_pred = predict(X, w)
        cost = np.sum(np.square(y - y_pred)) / (2 * n)
        grad_cost = np.dot(X.T, (y_pred - y)) / n
        w = w - (alpha * grad_cost)
        if (itr % 100 == 0):
            print("weights: ", w, " cost: ", cost)
    return w

In [34]:
def predict(a, w):
    y_pred = np.dot(a, w)
    y_pred = y_pred.reshape(len(y_pred), 1)
    return y_pred

In [62]:
y = np.array(y)
y = y.reshape(len(y), 1)
w = init_weights(X, y)

In [78]:
new_w = training(X, y, w)

weights:  [[ 965.63339156]
 [ 992.97680798]
 [-787.55732335]
 [6560.        ]]  cost:  220945000000.0
weights:  [[ 42245.96609494]
 [ 37135.94281877]
 [-25912.23865717]
 [418283.95628351]]  cost:  29223053939.127697
weights:  [[ 50989.17095407]
 [ 39535.07272273]
 [-24350.0714385 ]
 [568988.23996024]]  cost:  4156983800.314865
weights:  [[ 54693.95416068]
 [ 39172.51896437]
 [-22153.38314859]
 [624150.88175434]]  cost:  792635960.9990139
weights:  [[ 56732.33876728]
 [ 38699.40068683]
 [-20876.07988332]
 [644342.19268105]]  cost:  338266319.33855194
weights:  [[ 57931.48615206]
 [ 38296.62132489]
 [-20228.85738839]
 [651732.86549293]]  cost:  276137389.31115377
weights:  [[ 58663.41500719]
 [ 37949.46872025]
 [-19944.75424791]
 [654438.09076585]]  cost:  267334450.97719565
weights:  [[ 59130.02947024]
 [ 37639.10585952]
 [-19863.78965438]
 [655428.29070617]]  cost:  265929121.5114452
weights:  [[ 59444.79137867]
 [ 37353.80703331]
 [-19894.821082  ]
 [655790.73590865]]  cost:  26560102

In [80]:
y_pred = predict(X, new_w)
print(y, y_pred)

[[550000]
 [565000]
 [595000]
 [760000]
 [810000]] [[519810.17055296]
 [604971.28260002]
 [598944.9564057 ]
 [757098.04629986]
 [799033.94244996]]


In [58]:
def r_sqr_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)          # Residual Sum of Squares
    ss_tot = np.sum((y - np.mean(y)) ** 2)   
    return 1 - (ss_res / ss_tot)