In [None]:
#MLR ASSIGNMENT

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("/content/ToyotaCorolla - MLR.csv")

# Rename and retain relevant columns
df.rename(columns={
    'Age_08_04': 'Age',
    'cc': 'CC'
}, inplace=True)

df = df[['Price', 'Age', 'KM', 'Fuel_Type', 'HP', 'Automatic', 'CC', 'Doors', 'Weight']]

# One-hot encode Fuel_Type
df = pd.get_dummies(df, columns=['Fuel_Type'], drop_first=True)

# Define features and target
X = df.drop('Price', axis=1)
y = df['Price']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ----------------------- Model 1: Linear Regression -----------------------
model1 = LinearRegression()
model1.fit(X_train, y_train)
y_pred1 = model1.predict(X_test)

# ----------------------- Model 2: Linear Regression (without KM) ---------
X_reduced = df.drop(columns=['Price', 'KM'])
X_reduced_scaled = scaler.fit_transform(X_reduced)
X2_train, X2_test, y2_train, y2_test = train_test_split(X_reduced_scaled, y, test_size=0.2, random_state=42)

model2 = LinearRegression()
model2.fit(X2_train, y2_train)
y_pred2 = model2.predict(X2_test)

# ----------------------- Model 3: Polynomial Features ---------------------
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)
X3_train, X3_test, y3_train, y3_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

model3 = LinearRegression()
model3.fit(X3_train, y3_train)
y_pred3 = model3.predict(X3_test)

# ------------------------ Lasso & Ridge Regression ------------------------
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
y_lasso = lasso.predict(X_test)

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_ridge = ridge.predict(X_test)

# ------------------------ Evaluation Function ------------------------
def evaluate_model(y_true, y_pred, name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name}:\n  MSE = {mse:.2f}\n  R2 Score = {r2:.2f}\n")

# ------------------------ Evaluation Results ------------------------
evaluate_model(y_test, y_pred1, "Model 1: Basic Linear Regression")
evaluate_model(y2_test, y_pred2, "Model 2: Linear Regression (No KM)")
evaluate_model(y3_test, y_pred3, "Model 3: Polynomial Regression")
evaluate_model(y_test, y_lasso, "Lasso Regression")
evaluate_model(y_test, y_ridge, "Ridge Regression")



Model 1: Basic Linear Regression:
  MSE = 2224306.04
  R2 Score = 0.83

Model 2: Linear Regression (No KM):
  MSE = 2574027.83
  R2 Score = 0.81

Model 3: Polynomial Regression:
  MSE = 2994690.37
  R2 Score = 0.78

Lasso Regression:
  MSE = 2221942.42
  R2 Score = 0.83

Ridge Regression:
  MSE = 2221963.62
  R2 Score = 0.83

