In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [18]:
class LinearRegressionClassifier:
    def __init__(self, learning_rate=0.01, max_iters=1000):
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.w = None
        self.b = None
        

    def fit(self, X, y):
        # X:  (n_samples, n_features) containing the features.
        # y: (n_samples,) containing the binary labels (0 or 1).
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # Add bias term
        self.w = np.zeros(X.shape[1])
        self.b = 0
        m = X.shape[0]  # Number of training examples

        for _ in range(self.max_iters):
            # Calculating predictions
            y_pred = np.dot(X, self.w) + self.b

            # Computing gradients
            dw = (1 / m) * np.dot(X.T, (y_pred - y))
            db = (1 / m) * np.sum(y_pred - y)

            # Updating parameters using gradients
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db
            
#Predicts the class labels for new data points.
    def predict(self, X):
 
      
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # Add bias term
        y_pred = np.dot(X, self.w) + self.b
        return np.where(y_pred >= 0.5, 1, 0)

In [19]:
#Dataset  Loaded
dataset=pd.read_csv('"C:\Users\yalam\OneDrive - Amrita university\House price prediction.zip"')
dataset.head()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [20]:
#selected columns for X and y
y = dataset.price
features= ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','yr_built']
X=dataset[features]
X.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,yr_built
0,3.0,1.5,1340,7912,1.5,1955
1,5.0,2.5,3650,9050,2.0,1921
2,3.0,2.0,1930,11947,1.0,1966
3,3.0,2.25,2000,8030,1.0,1963
4,4.0,2.5,1940,10500,1.0,1976


In [21]:
#I used the  standard scalar to standardize the features in the  dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [22]:
#taken  20% of data into test set for validation and 80% to training set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [23]:
# Created and trained  the LinearRegressionClassifier
model = LinearRegressionClassifier(learning_rate=0.01, max_iters=1000)
model.fit(X_train, y_train)

In [24]:
predictions = model.predict(X_test)

In [25]:
from sklearn.metrics import mean_absolute_error
def mean_squared_error(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    return mse

# Implementation
y_true = np.array([1, 2, 3, 4, 5])
y_pred = np.array([1.1, 2.2, 3.1, 4.2, 5.3])
mse = mean_squared_error(y_true, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 0.03800000000000001
