In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import statsmodels.api as sm  # For OLS (Ordinary Least Squares) Regression

In [6]:
np.random.seed(42)  # Ensuring reproducibility of random noise
x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).reshape(-1, 1)  # Independent variable (Experience)
true_salaries = np.array([30, 35, 40, 45, 50, 55, 60, 65, 70, 75]) 

In [7]:
noise = np.random.normal(0, 5, size=true_salaries.shape)  # Adding Gaussian noise (mean=0, std=5)
y = true_salaries + noise 

In [9]:
degree=2
poly = PolynomialFeatures(degree=degree)  # Creating polynomial feature transformer
x_poly = poly.fit_transform(x) 

In [11]:
x_poly

array([[  1.,   1.,   1.],
       [  1.,   2.,   4.],
       [  1.,   3.,   9.],
       [  1.,   4.,  16.],
       [  1.,   5.,  25.],
       [  1.,   6.,  36.],
       [  1.,   7.,  49.],
       [  1.,   8.,  64.],
       [  1.,   9.,  81.],
       [  1.,  10., 100.]])

In [16]:
model=LinearRegression()
model.fit(x_poly,y)

In [17]:
m=model.coef_[0]
c=model.intercept_
print(f'Equation of line: y = {m:.2f}x + {c:.2f}')

Equation of line: y = 0.00x + 25.83


In [19]:
y_pred=model.predict(x_poly)
y_pred

array([31.52544525, 37.07102251, 42.47157998, 47.72711768, 52.8376356 ,
       57.80313374, 62.62361211, 67.29907069, 71.8295095 , 76.21492852])

In [22]:
mse=mean_squared_error(y,y_pred)
print(f'Mean Squared Error: {mse:.2f}')
mae=mean_absolute_error(y,y_pred)
print(f'Mean Absolute Error: {mae:.2f}')
rmse=np.sqrt(mse)
print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
r2=r2_score(y,y_pred)
print(f'R-Squared: {r2:.3f}')
n=len(y) #Number of observations
p=1 #Number of independent variables (Experience)
adj_r2=1-((1-r2)*(n-1)/(n-p-1))
print(f'Adjusted R-squared: {adj_r2:.2f}')

Mean Squared Error: 11.47
Mean Absolute Error: 2.98
Root Mean Squared Error (RMSE): 3.39
R-Squared: 0.947
Adjusted R-squared: 0.94


In [21]:
x_ols = sm.add_constant(x_poly)  # Add intercept term for OLS regression
ols_model = sm.OLS(y, x_ols).fit()  # Fit OLS model
print("\nOLS Regression Summary:\n")
print(ols_model.summary()) 


OLS Regression Summary:

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.947
Model:                            OLS   Adj. R-squared:                  0.931
Method:                 Least Squares   F-statistic:                     62.13
Date:                Sat, 01 Mar 2025   Prob (F-statistic):           3.50e-05
Time:                        12:24:07   Log-Likelihood:                -26.390
No. Observations:                  10   AIC:                             58.78
Df Residuals:                       7   BIC:                             59.69
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         25.8348     

  return hypotest_fun_in(*args, **kwds)
