# **Get all imports**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# **Use Pandas to read data and separate into input features and target**
### **Data Description**: We are going to predict Miles per Gallon (MPG) consumed by cars based on the following features


1.   Cylinders - (Real) Number of cylinders used in the car's engine
2.   Displacement - (Real) A measure of the cylinder volume swept by all of the pistons
3.   Horsepower - (Real) The power of the engine
2.   Weight - (Real) The weight of the entire car
2.   Acceleration - (Real) The acceleration of the car
2.   Mode Year - (Real) The year that the vehicle was built



In [None]:
df = pd.read_csv("auto-mpg.data", delimiter=",")
# Drop the origin and car name columns
df = df.drop(["origin", "car name"], axis=1)
display(df.head())

X, Y = df.drop(["mpg"], axis=1).values, df["mpg"].values

# **Define the linear regression model using Pseudo Inverse.**

In [None]:
class LinearRegression():
    # Constructor of this class
    def __init__(self):
        self.coeff = list()

    # This function is used to find the coefficients for the line of best fit
    def fit(self, A, Y):
        # Add Bias
        A = np.concatenate((np.ones((len(A), 1)), A), axis=1)
        # Find Pseudo Inverse
        pseudo_inv = np.matmul(np.linalg.inv(np.matmul(np.transpose(A), A)), np.transpose(A))
        # Finally get the coefficients
        self.coeff = np.matmul(pseudo_inv, np.reshape(Y, (-1, 1)))

    # This function uses the found coefficients to deliver predictions
    def predict(self, A):
        A = np.concatenate((np.ones((len(A), 1)), A), axis=1)
        print("Model Coefficients:", self.coeff)
        return np.squeeze(np.matmul(A, self.coeff))

# **Applying Linear Regression to Predict Miles Per Gallon**

In [None]:
model = LinearRegression()
model.fit(X, Y)
preds = model.predict(X)
print("Mean Squared Error:", (np.sum((Y-preds)**2))/len(Y))