In [349]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

# Assuming data is already loaded and contains the necessary columns
# Step 2: Preprocessing
feature1 = 'Cylinders'
feature2 = 'Fuel Consumption City (L/100 km)'
output = 'CO2 Emissions(g/km)'
alpha = 0.003  # learning rate
maxIterations = 10000

data = shuffle(data, random_state=100)  # shuffle the data
trainingData, testData = train_test_split(data, test_size=0.2, random_state=100)  # split the data

x1 = trainingData[feature1]
x2 = trainingData[feature2]
y = trainingData[output]
yMean = y.mean()
yStd = y.std()


scaler = StandardScaler()
x1Normalized = scaler.fit_transform(x1.values.reshape(-1, 1))
x2Normalized = scaler.transform(x2.values.reshape(-1, 1))

x = np.column_stack((x1Normalized, x2Normalized))

cost = []

def fitGD(x, y, alpha, maxIterations):
    x = np.c_[np.ones(x.shape[0]), x]  # Add a column of ones to x for the bias term
    thetas = np.zeros(x.shape[1])  # Initialize thetas with zeros
    for i in range(maxIterations):
        h = np.dot(x, thetas)
        for j in range(len(thetas)):
            partialDerivative = (1/len(y)) * np.sum((h - y) * x[:, j])
            thetas[j] = thetas[j] - alpha * partialDerivative
        
        # Calculate and store the mean squared error
        mse = mean_squared_error(y, h)
        cost.append(mse)
        
        # Optional: Print cost function value to monitor convergence
        if i % 1000 == 0:
            print(f"Iteration {i}: MSE {mse}")
            
    return thetas

# Fit the model
# thetas = fitGD(x, y, alpha, maxIterations)
model = LinearRegression()
model.fit(x, y)

# Step 3: Predict
# testData = testData.copy()
x1 = testData[feature1]
x2 = testData[feature2]
y = testData[output]

x1TestNormalized = scaler.transform(x1.values.reshape(-1, 1))
x2TestNormalized = scaler.transform(x2.values.reshape(-1, 1))
xTest = np.column_stack((x1TestNormalized, x2TestNormalized))
yPredicted = model.predict(xTest)
r2 = r2_score(y, yPredicted)
print(f"R2 Score: {r2}")

print(model.coef_)
print(model.intercept_)






R2 Score: 0.8594612813195779
[15.18881057 21.81901792]
167.7286004616153
