In [9]:
import numpy as np
import pandas as pd 


def linear_regression_hypothesis(X, theta):
    return np.dot(X, theta)

In [10]:

def mean_squared_error(theta, X, y):
    m = len(y) 
    h = linear_regression_hypothesis(X, theta)
    squared_errors = (h - y) ** 2  
    mse = np.sum(squared_errors) / (2 * m) 
    return mse

In [11]:
def gradient_descent_step(theta, X, y, learning_rate):
    m = len(y) 
    h = np.dot(X, theta)  
    gradient = np.dot(X.T, (h - y)) / m  
    new_theta = theta - learning_rate * gradient  
    return new_theta

In [12]:
def normalize_data(data):
    return (data - data.min()) / (data.max() - data.min())


def denormalize_data(normalized_data, original_min, original_max):
    return normalized_data * (original_max - original_min) + original_min

In [13]:
df = pd.read_csv('Housing.csv')
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [14]:
from sklearn.linear_model import LinearRegression
features = ['area', 'bathrooms', 'bedrooms',]
normalized = normalize_data(df[features])
target = normalize_data(df['price'])

model = LinearRegression()
model.fit(normalized, target)

print(f'Intercept from scikit-learn: {model.intercept_}')
print(f'Coefficients from scikit-learn: {model.coef_}')

Intercept from scikit-learn: 0.042827399769954144
Coefficients from scikit-learn: [0.47714269 0.36001286 0.17611257]


In [15]:
X = np.hstack((np.ones((normalized.shape[0], 1)), normalized))

w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(target)

print(f'Intercept from normal equation: {w[0]}')
print(f'Coefficients from normal equation: {w[1:]}')

Intercept from normal equation: 0.042827399769950786
Coefficients from normal equation: [0.47714269 0.36001286 0.17611257]


In [16]:
original_target = denormalize_data(normalized, df['price'].min(), df['price'].max())
print(original_target)

Unnamed: 0,area,bathrooms,bedrooms
0,6.330309e+06,5600000.0,8680000.0
1,7.552784e+06,13300000.0,8680000.0
2,8.346598e+06,5600000.0,6370000.0
3,6.393814e+06,5600000.0,8680000.0
4,6.330309e+06,1750000.0,8680000.0
...,...,...,...
540,2.821649e+06,1750000.0,4060000.0
541,2.345361e+06,1750000.0,6370000.0
542,3.313814e+06,1750000.0,4060000.0
543,2.750206e+06,1750000.0,6370000.0
