In [1]:
# Step 1: Import necessary libraries

import numpy as np  # For handling numerical data
import matplotlib.pyplot as plt  # For data visualization
from sklearn.linear_model import LinearRegression  # For building the regression model
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score  # For evaluating the model
import statsmodels.api as sm  # For OLS (Ordinary Least Squares) Regression
from sklearn.preprocessing import PolynomialFeatures  # For polynomial transformation
  

 

In [26]:
# Step 2: Define the dataset (Experience in years and corresponding Salary in $1000s)

np.random.seed(42)  # Ensuring reproducibility of random noise
 
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).reshape(-1, 1)  # Independent variable (Experience)

true_salaries = np.array([30, 35, 40, 45, 50, 55, 60, 65, 70, 75])  # True salary values

In [27]:
# Adding some random noise to salaries to reduce accuracy
noise = np.random.normal(0, 5, size=true_salaries.shape)  # Adding Gaussian noise (mean=0, std=5)
Y = true_salaries + noise  # Final dependent variable with noise
print(Y)

[32.48357077 34.30867849 43.23844269 52.61514928 48.82923313 53.82931522
 67.89606408 68.83717365 67.65262807 77.71280022]


In [28]:
# Step 3: Convert data into Polynomial Features (degree 2 for quadratic relationship)
degree = 2  # You can change this for higher degrees
poly = PolynomialFeatures(degree=degree)  # Creating polynomial feature transformer
X_poly = poly.fit_transform(X)  # Transforming X to polynomial features

In [29]:
# 6. Ordinary Least Squares (OLS) Summary - Gives detailed statistical summary of the regression
X_ols = sm.add_constant(X_poly)  # Add intercept term for OLS regression
ols_model = sm.OLS(Y, X_ols).fit()  # Fit OLS model
print("\nOLS Regression Summary:\n")
print(ols_model.summary())  # Display detailed statistical summary


OLS Regression Summary:

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.947
Model:                            OLS   Adj. R-squared:                  0.931
Method:                 Least Squares   F-statistic:                     62.13
Date:                Sat, 01 Mar 2025   Prob (F-statistic):           3.50e-05
Time:                        12:31:32   Log-Likelihood:                -26.390
No. Observations:                  10   AIC:                             58.78
Df Residuals:                       7   BIC:                             59.69
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         25.8348     

  res = hypotest_fun_out(*samples, **kwds)


In [30]:
# Step 3: Convert data into Polynomial Features (degree 2 for quadratic relationship)
degree = 2  # You can change this for higher degrees
poly = PolynomialFeatures(degree=degree)  # Creating polynomial feature transformer
X_poly = poly.fit_transform(X)  # Transforming X to polynomial features

In [31]:
# Step 4: Create and train the Polynomial Regression model
model = LinearRegression()  # Initializing the model
model.fit(X_poly, Y)  # Training the model

In [32]:
#step5:get m and c
#calculate intercept
c = model.intercept_
m = model.coef_[0]
print(f"Intercept: {c}")
print(f"Slope: {m}")
 

Intercept: 25.834848216909297
Slope: 0.0


In [33]:
#step6:diaplay equation
print(f"Equation of Line:Y={m:.2f}X+{c:.2f}")

Equation of Line:Y=0.00X+25.83


In [35]:
#step7:predict values using the model
Y_pred=model.predict(X_poly)

In [36]:
#1.Mean Square Error
mse=mean_squared_error(Y,Y_pred)
print(f"Mean squared Error (MSE):{mse:.2f}")

Mean squared Error (MSE):11.47


In [37]:
#mean absolute error
mae=mean_absolute_error(Y,Y_pred)
print(f"Mean Absolute Error:{mae:.2f}")

Mean Absolute Error:2.98


In [38]:
#3.Root mean squared error
rmse=np.sqrt(mse)
print(f"RSME:{rmse:.2f}")

RSME:3.39


In [39]:
#4.Rsquared
r2=r2_score(Y,Y_pred)
print(f"R-squared:{r2:.2f}")

R-squared:0.95


In [40]:
#5.Adjusted R-squared
n=len(Y)
p=1
adj_r2=1-((1-r2)*(n-1)/(n-p-1))
print(f"Adjusted R-Squared:{adj_r2:.2f}")

Adjusted R-Squared:0.94
