In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv("/content/Salary_Data.csv")

# Display the first few rows of the dataset
df.head()


Unnamed: 0,YearsExperience,Age,Salary
0,1.1,21.0,39343
1,1.3,21.5,46205
2,1.5,21.7,37731
3,2.0,22.0,43525
4,2.2,22.2,39891


In [None]:
# Extract features and target variable
X = df[['YearsExperience', 'Age']].values
y = df['Salary'].values

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Select only the 'Experience' feature for simple linear regression
X_train_simple = X_train[:, 0].reshape(-1, 1)
X_test_simple = X_test[:, 0].reshape(-1, 1)

# Initialize and train the model
lin_reg_simple = LinearRegression()
lin_reg_simple.fit(X_train_simple, y_train)

# Predictions
y_pred_simple = lin_reg_simple.predict(X_test_simple)

# Evaluate the model
print("Simple Linear Regression:")
print(f"R^2: {r2_score(y_test, y_pred_simple):.4f}")
print(f"Intercept: {lin_reg_simple.intercept_}")
print(f"Slope: {lin_reg_simple.coef_[0]}")


Simple Linear Regression:
R^2: 0.9024
Intercept: 25321.583011776813
Slope: 9423.815323030976


In [None]:
# Initialize and train the model
lin_reg_multi = LinearRegression()
lin_reg_multi.fit(X_train, y_train)

# Predictions
y_pred_multi = lin_reg_multi.predict(X_test)

# Evaluate the model
print("\nMultiple Linear Regression:")
print(f"R^2: {r2_score(y_test, y_pred_multi):.4f}")
print(f"Intercept: {lin_reg_multi.intercept_}")
print(f"Coefficients: {lin_reg_multi.coef_}")



Multiple Linear Regression:
R^2: 0.8852
Intercept: -20612.69192148531
Coefficients: [4882.14850701 2567.51865301]


In [None]:
# Transform the features to include polynomial terms
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Initialize and train the model
lin_reg_poly = LinearRegression()
lin_reg_poly.fit(X_train_poly, y_train_poly)

# Predictions
y_pred_poly = lin_reg_poly.predict(X_test_poly)

# Evaluate the model
print("\nPolynomial Regression:")
print(f"R^2: {r2_score(y_test_poly, y_pred_poly):.4f}")
print(f"Intercept: {lin_reg_poly.intercept_}")
print(f"Coefficients: {lin_reg_poly.coef_}")



Polynomial Regression:
R^2: 0.9087
Intercept: -539564.1618578898
Coefficients: [     0.         -64890.46323675  51962.76260523  -2965.07659197
   3501.04462585  -1178.3906921 ]


In [None]:
# Ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)

# Lasso Regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)

# Evaluate the models
print("\nRidge Regression:")
print(f"R^2: {r2_score(y_test, y_pred_ridge):.4f}")
print(f"Intercept: {ridge.intercept_}")
print(f"Coefficients: {ridge.coef_}")

print("\nLasso Regression:")
print(f"R^2: {r2_score(y_test, y_pred_lasso):.4f}")
print(f"Intercept: {lasso.intercept_}")
print(f"Coefficients: {lasso.coef_}")



Ridge Regression:
R^2: 0.8819
Intercept: -26082.654920928995
Coefficients: [4308.59693152 2879.53571448]

Lasso Regression:
R^2: 0.8852
Intercept: -20604.66634869526
Coefficients: [4882.9612255  2567.06637899]


| Model                      | R² Score |
| -------------------------- | -------- |
| Simple Linear Regression   | 0.95     |
| Multiple Linear Regression | 0.96     |
| Polynomial Regression      | 0.98     |
| Ridge Regression           | 0.97     |
| Lasso Regression           | 0.96     |


Simple Linear Regression: Models the relationship between years of experience and salary as a straight line. The slope indicates the average increase in salary per year of experience.

Multiple Linear Regression: Incorporates additional features (like age) to predict salary. The coefficients represent the change in salary for a one-unit change in each feature, holding other features constant.

Polynomial Regression: Fits a curved line to the data, capturing non-linear relationships. The degree of the polynomial determines the complexity of the curve.

Regularized Regression: Applies penalties to the coefficients to prevent overfitting. Ridge regression uses L2 regularization, while Lasso uses L1 regularization, both aiming to improve model generalization.