In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing

In [4]:
df = pd.read_csv('../homeprices copy.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [5]:
df.isna().sum()
df.dropna(inplace = True)

In [6]:
X = df.drop('price', axis = 1)
y = df['price']

In [7]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
ones_col = np.ones(len(X))
ones_col = ones_col.reshape(len(X), 1)
X = np.hstack((X, ones_col))

In [6]:
w = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))

In [7]:
y_pred = np.dot(X, w)
y_pred

array([519866.710013  , 604304.29128739, 598930.42912874, 760000.        ,
       796898.56957087])

In [8]:
r_squared = 1 - (np.sum(np.square(y - y_pred)) / np.sum(np.square(y - np.mean(y))))
r_squared

0.9543026098066568

In [9]:
def prepare_data(a, b):
    bais_col = np.ones(len(b))
    bais_col = bais_col.reshape(len(b), 1)
    a = np.hstack((a, bais_col))
    return a, b

In [32]:
def init_weights(a, b):
    w = np.zeros(a.shape[1])
    w = w.reshape(len(w), 1)
    return w

In [74]:
def training(X, y, w):
    n = X.shape[0]
    alpha = 0.001
    for itr in range(100000):
        y_pred = predict(X, w)
        cost = np.sum(np.square(y - y_pred)) / (2 * n)
        grad_cost = np.dot(X.T, (y_pred - y)) / n
        w = w - (alpha * grad_cost)
        if (itr % 100 == 0):
            print("weights: ", w, " cost: ", cost)
    return w

In [34]:
def predict(a, w):
    y_pred = np.dot(a, w)
    y_pred = y_pred.reshape(len(y_pred), 1)
    return y_pred

In [62]:
y = np.array(y)
y = y.reshape(len(y), 1)
w = init_weights(X, y)

In [75]:
new_w = training(X, y, w)

weights:  [[ 96.56333916]
 [ 99.2976808 ]
 [-78.75573233]
 [656.        ]]  cost:  220945000000.0
weights:  [[ 8788.56922235]
 [ 8902.64882951]
 [-6983.50044667]
 [63049.89514191]]  cost:  179900125526.14056
weights:  [[ 15840.84442139]
 [ 15801.17142279]
 [-12252.45439444]
 [119503.40149415]]  cost:  146694475374.83337
weights:  [[ 21591.26232867]
 [ 21202.02990784]
 [-16243.58748734]
 [170582.09071869]]  cost:  119755117543.6125
weights:  [[ 26306.32854691]
 [ 25425.54812796]
 [-19238.06069553]
 [216797.68761392]]  cost:  97852047225.77261
weights:  [[ 30196.35547553]
 [ 28723.72049729]
 [-21456.62430902]
 [258613.19675889]]  cost:  80014026156.98532
weights:  [[ 33427.40482884]
 [ 31294.77785294]
 [-23072.51979783]
 [296447.54106082]]  cost:  65468052138.28223
weights:  [[ 36130.68680024]
 [ 33294.64882721]
 [-24221.63051559]
 [330679.75867641]]  cost:  53594986753.79182
weights:  [[ 38409.95781259]
 [ 34845.97838086]
 [-25010.4676593 ]
 [361652.80035328]]  cost:  43896437897.443985

In [73]:
y_pred = predict(X, new_w)
print(y, y_pred)

[[550000]
 [565000]
 [595000]
 [760000]
 [810000]] [[519810.03998719]
 [604971.0632615 ]
 [598941.65246636]
 [757096.4821563 ]
 [799032.59355385]]


In [58]:
def r_sqr_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)          # Residual Sum of Squares
    ss_tot = np.sum((y - np.mean(y)) ** 2)   
    return 1 - (ss_res / ss_tot)