# Comparison of OLS vs. SGD Regression for Car Price Prediction
This notebook demonstrates how to use Ordinary Least Squares (OLS) and Stochastic Gradient Descent (SGD) regression to predict car prices using a dataset, and compares their performance.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

## Load and Prepare Data

In [None]:
# Load dataset (ensure cars.csv is in the same directory)
cars = pd.read_csv("cars.csv")
cars = cars[['year', 'selling_price', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner', 'mileage', 'engine', 'max_power', 'seats']].dropna()
cars = pd.get_dummies(cars, columns=['fuel', 'seller_type', 'transmission', 'owner'], drop_first=True)
cars.head()

## Feature Selection

In [None]:
X = cars.drop(columns=['selling_price'])
y = cars['selling_price']

## Split Data into Training and Test Sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Train Ordinary Least Squares (OLS) Regression

In [None]:
ols_model = LinearRegression()
ols_model.fit(X_train, y_train)
ols_pred = ols_model.predict(X_test)

## Train SGD Regression Model

In [None]:
sgd_model = SGDRegressor(max_iter=100000, learning_rate='optimal', eta0=0.01, random_state=42)
sgd_model.fit(X_train_scaled, y_train)
sgd_pred = sgd_model.predict(X_test_scaled)

## Evaluate Models

In [None]:
ols_mse = mean_squared_error(y_test, ols_pred)
ols_r2 = r2_score(y_test, ols_pred)
sgd_mse = mean_squared_error(y_test, sgd_pred)
sgd_r2 = r2_score(y_test, sgd_pred)

## Print Comparison

In [None]:
print("OLS Regression Results:")
print(f"MSE: {ols_mse:.4f}, R2: {ols_r2:.4f}")
print(f"Coefficients: {ols_model.coef_}, Intercept: {ols_model.intercept_}")

print("\nSGD Regression Results:")
print(f"MSE: {sgd_mse:.4f}, R2: {sgd_r2:.4f}")
print(f"Coefficients: {sgd_model.coef_}, Intercept: {sgd_model.intercept_}")

## Plot Results

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, ols_pred, color='yellow', alpha=0.5, label='OLS Predictions')
plt.scatter(y_test, sgd_pred, color='pink', label='SGD Predictions')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='blue', linestyle='dashed', label='Ideal Fit')
plt.xlabel('Actual Selling Price')
plt.ylabel('Predicted Selling Price')
plt.legend()
plt.title('Comparison of OLS vs. SGD Regression')
plt.show()