<a href="https://colab.research.google.com/github/RasheedKhan123/Machine-Learning/blob/main/LinearRegressionProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing numpy library
import numpy as np

# Defining Linear Regression class
class LinearRegression:
    def __init__(self, X, y):
        # Initialize method to set the independent and dependent variables
        self.X = X
        self.y = y
        self.coefficients = None

    def dimensionality(self, x):
        # Method to get the shape of the input matrix/array
        return x.shape

    def if_matrix_invertible(self, x):
        # Method to check if a matrix is invertible
        try:
            determinant = np.linalg.det(x)
            return determinant != 0
        except np.linalg.LinAlgError:
            return False

    def scale(self, x):
        # Method to scale the input data using Min-Max scaling
        self.x = x
        return ((x - x.min()) / (x.max() - x.min()))

    def train_test_split(self, X, y, train_ratio):
        # Method to split the data into training and testing sets based on the provided ratio
        self.X = X
        self.y = y
        order = np.array(range(X.shape[0]))
        np.random.shuffle(order)
        X_shuffled = X[order, :]
        y_shuffled = y[order]
        train_size = int(X.shape[0] * train_ratio)
        X_train, X_test = X[order, :][:train_size+1, :].copy(), X[order, :][train_size+1:, :].copy()
        y_train, y_test = y[order][:train_size+1].copy(), y[order][train_size+1:].copy()
        return X_train, y_train, X_test, y_test

    def compute_coeff(self, X, y):
        # Method to compute the coefficients of the linear regression model
        self.X = X
        ones = np.ones([X.shape[0], 1])  # Creating a column of ones for the bias term
        X_new = np.hstack([ones, X])  # Augmenting the data with the ones column
        # Calculating the coefficients using the normal equation
        return np.matmul(np.linalg.inv(np.matmul(X_new.T, X_new)), np.matmul(X_new.T, y))

    def compute_loss(self, y, y_pred):
        # Method to compute the Mean Squared Error (MSE) loss
        return np.dot(y - y_pred, y - y_pred) / len(y)


In [None]:
def main():
    # Import necessary module from scikit-learn
    from sklearn.datasets import fetch_california_housing

    # Fetch the California housing dataset
    X, y = fetch_california_housing(return_X_y=True)

    # Initialize the Linear Regression model with the loaded data
    lg = LinearRegression(X, y)

    # Print the dimensions of the loaded data
    print("DIMENSIONALITY:", lg.dimensionality(x=X))

    # Check and print whether the matrix is invertible
    is_invertible = lg.if_matrix_invertible(X)
    print("\nIS INVERTIBLE:", is_invertible)

    # Scale the data and print the first row of the scaled data
    print("\nFIRST ROW AFTER SCALED:")
    print(lg.scale(x=X)[:1])

    # Split the data into training and testing sets
    X_train, y_train, X_test, y_test = lg.train_test_split(X, y, train_ratio=0.8)

    # Compute the coefficients using the training data
    beta = lg.compute_coeff(X=X, y=y)

    # Print the intercept (bias) of the regression model
    intercept = beta[0]
    print("\nINTERCEPT:", beta[0])

    # Add a bias column (column of ones) to the training data
    X_train_with_bias = np.column_stack((np.ones(X_train.shape[0]), X_train))
    print("\nX_train_with_bias:", X_train_with_bias.shape)

    # Predict the training target values using the training data and the computed coefficients
    y_pred = np.dot(X_train_with_bias, beta)
    print(y_pred.shape)

    # Compute and print the Mean Squared Error (MSE) on the training data
    mse = lg.compute_loss(y_train, y_pred)
    print("\nTraining MSE(project):", mse)

    # Add a bias column (column of ones) to the testing data
    X_test_with_bias = np.column_stack((np.ones(X_test.shape[0]), X_test))
    print("\nX_test_with_bias:", X_test_with_bias.shape)

    # Predict the testing target values using the testing data and the computed coefficients
    y_test_pred = np.dot(X_test_with_bias, beta)
    print("y_test_pred shape:", y_test_pred.shape)

    # Compute and print the Mean Squared Error (MSE) on the testing data
    mse_test = lg.compute_loss(y_test, y_test_pred)
    print("\nTesting MSE(project):", mse_test)


# If the script is being run as the main module, execute the main function
if __name__ == "__main__":
    main()


DIMENSIONALITY: (20640, 8)

IS INVERTIBLE: False

FIRST ROW AFTER SCALED:
[[3.70535394e-03 4.61789599e-03 3.66790044e-03 3.50144065e-03
  1.24656660e-02 3.54421927e-03 4.53076061e-03 5.92073752e-05]]

INTERCEPT: -36.941920206336334

X_train_with_bias: (16513, 9)
(16513,)

Training MSE(project): 0.5279364171000122

X_test_with_bias: (4127, 9)
y_test_pred shape: (4127,)

Testing MSE(project): 0.5098548823062259


In [None]:
# Importing the required module from scikit-learn library
from sklearn.datasets import fetch_california_housing

# Fetching the California housing dataset
# This dataset contains housing data for California districts
# X contains the features (e.g., median income, housing median age)
# y contains the target variable, i.e., the median house value for each district
X, y = fetch_california_housing(return_X_y=True)


In [None]:
# Importing the Linear Regression class from the scikit-learn library
from sklearn.linear_model import LinearRegression

# Instantiating the Linear Regression model
model = LinearRegression()

# Fitting (or training) the model with the data (X) and target values (y)
# This step will compute the coefficients of the linear regression equation based on the provided dataset
model.fit(X, y)


In [None]:
# Retrieve and print the intercept (or bias term) of the trained Linear Regression model.
# The intercept represents the predicted output when all input features are set to zero.
print(model.intercept_)


-36.94192020718441


In [None]:
# Importing necessary modules from scikit-learn library
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Split the dataset into 80% training and 20% testing sets
# This helps to evaluate the performance of the model on unseen data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the Linear Regression model
model = LinearRegression()

# Training the model on the training data
model.fit(X_train, y_train)

# Retrieving the intercept (bias term) of the trained model
# It represents the model's output when all features are zero
intercept = model.intercept_
print("Intercept (Bias):", intercept)

# Using the trained model to make predictions on the training data
y_train_pred = model.predict(X_train)

# Computing the Mean Squared Error for training data
# This metric indicates the average squared difference between actual and predicted values
mse_train = mean_squared_error(y_train, y_train_pred)
print("\nTraining Mean Squared Error(using predefined function):", mse_train)

# Using the trained model to make predictions on the testing data
y_test_pred = model.predict(X_test)

# Computing the Mean Squared Error for testing data
# This will give insight into how well the model performs on unseen data
mse_test = mean_squared_error(y_test, y_test_pred)
print("\nTesting Mean Squared Error(using predefined function):", mse_test)


Intercept (Bias): -37.02327770606391

Training Mean Squared Error(using predefined function): 0.5179331255246699

Testing Mean Squared Error(using predefined function): 0.5558915986952422
