In [None]:
from openml import datasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
house_prices_dataset_id = 42165

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
house_prices_dataset = datasets.get_dataset(house_prices_dataset_id)
X, y, _, _ = house_prices_dataset.get_data(target=house_prices_dataset.default_target_attribute)

In [None]:
df = pd.concat([pd.DataFrame(X), pd.DataFrame(y,columns=[house_prices_dataset.default_target_attribute])], axis=1)

In [None]:
df.head()

In [None]:
df.info()

# Linear Regression Class

In [None]:
class LinearRegression:
    def __init__(self,epochs,alpha):
        self.epochs = epochs
        self.alpha = alpha     # alpha is the learning rate
    
    # fit method
    def fit(self,X,y):
        # m = no of observations
        # n = no of features
        self.m,self.n = X.shape
        # y = wx + b
        # b = intecept
        # w = co efficients
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.y = y
        # gradient descent
        for i in range(self.epochs):
            self.update_weights()
        return self
    
    # gradient decsent to find global minima
    def update_weights(self):
        y_hat = self.predict(self.X)
        # calculate gradients
        dw = -(2*(self.X.T).dot(self.y-y_hat))/self.m
        db = -(2*np.sum(self.y-y_hat))/self.m
        # update weights
        self.w = self.w - self.alpha*dw
        self.b = self.b - self.alpha*db
        return self
    
    # predict method
    def predict(self,X):
        return X.dot(self.w)+self.b

# Simple Linear Regression with Neighborhood feature only

In [None]:
neighbor = df['Neighborhood']
neighbor.unique()

In [None]:
X = neighbor.values
y = df.iloc[:,-1].values

In [None]:
print(X)
print(X.shape)

In [None]:
X=X.reshape((1460,1))
print(X.shape)
print(X)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder='passthrough')
X=ct.fit_transform(X).toarray()

In [None]:
print(X)

In [58]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=0)

In [101]:
model =LinearRegression(epochs=1500,alpha=0.02)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [102]:
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[213618.41261649 200624.        ]
 [146983.8467319  133000.        ]
 [131684.73258978 110000.        ]
 [204542.87379724 192000.        ]
 [143437.46076827  88000.        ]
 [129757.41123064  85000.        ]
 [227829.18789402 282922.        ]
 [114781.37102395 141000.        ]
 [309287.58298465 745000.        ]
 [227829.18789402 148800.        ]
 [227829.18789402 208900.        ]
 [192384.80424958 136905.        ]
 [191410.91684583 225000.        ]
 [146983.8467319  123000.        ]
 [146983.8467319  119200.        ]
 [146983.8467319  145000.        ]
 [309287.58298465 190000.        ]
 [203254.43995142 123600.        ]
 [138417.02318618 149350.        ]
 [129757.41123064 155000.        ]
 [146983.8467319  166000.        ]
 [203254.43995142 144500.        ]
 [114781.37102395 110000.        ]
 [191410.91684583 174000.        ]
 [192384.80424958 185000.        ]
 [129757.41123064 168000.        ]
 [227829.18789402 177500.        ]
 [141371.48230571  84500.        ]
 [126124.35591522 32

In [103]:
from sklearn.metrics import mean_squared_error
error = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(error)
print(rmse)

58188.05528258306
