In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [3]:
# Load the dataset
data = pd.read_csv('Housing.csv')

# Display the first few rows of the dataset
data.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
# Select the relevant columns and normalize them
X = data[['area', 'bedrooms', 'bathrooms']].values
y = data['price'].values

# Normalize the features
X = (X - X.mean(axis=0)) / X.std(axis=0)
y = (y - y.mean()) / y.std()

# Add a column of ones for the intercept term
X = np.hstack((np.ones((X.shape[0], 1)), X))


In [5]:
# Initialize the parameters
w = np.zeros(X.shape[1])

# Hypothesis function
def hypothesis(X, w):
    return np.dot(X, w)

# Loss function
def compute_loss(X, y, w):
    m = len(y)
    predictions = hypothesis(X, w)
    loss = (1/(2*m)) * np.sum((predictions - y)**2)
    return loss

# Gradient descent step
def gradient_descent_step(X, y, w, learning_rate):
    m = len(y)
    predictions = hypothesis(X, w)
    gradient = (1/m) * np.dot(X.T, (predictions - y))
    w = w - learning_rate * gradient
    return w

# Train the model
def train_model(X, y, learning_rate, num_iterations):
    w = np.zeros(X.shape[1])
    for i in range(num_iterations):
        w = gradient_descent_step(X, y, w, learning_rate)
    return w


In [6]:
# Parameters for training
learning_rate = 0.01
num_iterations = 1000

# Train the model
w_optimal = train_model(X, y, learning_rate, num_iterations)
print("Optimal parameters (Gradient Descent):", w_optimal)


Optimal parameters (Gradient Descent): [3.02901435e-16 4.39444124e-01 1.60696001e-01 3.72187885e-01]


In [7]:
# Analytical solution
def analytical_solution(X, y):
    return np.linalg.inv(X.T @ X) @ X.T @ y

w_analytical = analytical_solution(X, y)
print("Optimal parameters (Analytical Solution):", w_analytical)


Optimal parameters (Analytical Solution): [3.15434935e-16 4.39452085e-01 1.60528660e-01 3.72344423e-01]


In [8]:
# Create and train the model using scikit-learn
reg = LinearRegression().fit(X, y)
print("Coefficients (scikit-learn):", reg.coef_)
print("Intercept (scikit-learn):", reg.intercept_)


Coefficients (scikit-learn): [0.         0.43945209 0.16052866 0.37234442]
Intercept (scikit-learn): 3.1636120159920053e-16
