In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data  = pd.read_csv("Housing.csv")
data.head(5)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [3]:
data.isna().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [4]:
data['furnishingstatus'].nunique()

3

In [5]:
columns_to_transform = ['mainroad', 'guestroom', 'basement','hotwaterheating','airconditioning','prefarea']
data[columns_to_transform] = data[columns_to_transform].replace({'yes': 1, 'no': 0})

data['furnishingstatus'] = data['furnishingstatus'].replace({'unfurnished': 0, 'semi-furnished': 1, 'furnished': 2})

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
sc = ['price', 'area']
data[sc] = scaler.fit_transform(data[sc])

In [7]:
X = data.drop('price', axis=1)
y = data['price']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

Shape of X_train: (436, 12)
Shape of y_train: (436,)


In [10]:
X_train

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
46,0.391790,3,2,4,1,0,0,0,1,1,0,2
93,0.945257,3,2,1,1,0,1,0,1,3,0,1
335,-0.615521,2,1,1,1,0,1,0,1,2,0,2
412,-1.171756,3,1,2,1,0,1,0,0,0,1,0
471,-0.645962,3,1,2,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
71,0.391790,4,2,4,1,0,0,0,1,0,0,0
106,0.138117,4,2,1,1,0,1,0,1,0,1,1
270,-0.300045,3,2,3,1,0,0,1,0,1,0,2
435,-0.512207,2,1,1,1,0,0,0,0,0,0,0


In [11]:
import numpy as np
class LinearRegression:
    def __init__(self,learning_rate = 0.01,epochs = 1000):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.m = None
        self.c = 0
        
    def predict(self,X):
        X = np.array(X)  
        if X.ndim == 1:  
            X = X.reshape(-1, 1)
        return np.dot(X, self.m) + self.c
    
    def fit(self,X,y):
        n, m = X.shape  
        self.m = np.zeros(m)
        for _ in range(self.epochs):
            y_pred = self.predict(X)
        
            dm = (-2 / n) * np.dot(X.T, (y - y_pred))  
            dc = (-2 / n) * np.sum(y - y_pred)
            # If X is a single feature, reshape it
            self.m -= self.learning_rate * dm
            self.c -= self.learning_rate * dc
    
    def coeficients(self):
        return self.m , self.c

In [12]:
model = LinearRegression()
model.fit(X_train, y_train)

In [13]:
m, c = model.coeficients()
print(f"Coefficients: {m}")
print(f"Intercept: {c}")

Coefficients: [ 0.33397232 -0.11868606  0.46470097  0.23070963 -0.12292916  0.13372466
  0.20858269  0.20372162  0.43902663  0.12959024  0.32576573  0.09792083]
Intercept: -1.0458329625031053


In [14]:
ypred = model.predict(X_test)

In [15]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, ypred)
r2 = r2_score(y_test, ypred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.5689805408564358
R-squared: 0.606900076730245
