In [64]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

In [65]:
np.random.seed(42)
n = 100
CIE = np.random.normal(50, 10, n)
SEE = 0.7 * CIE + np.random.normal(5, 0, n)

In [66]:
data = pd.DataFrame({'CIE': CIE, 'SEE': SEE})
data


Unnamed: 0,CIE,SEE
0,54.967142,43.476999
1,48.617357,39.032150
2,56.476885,44.533820
3,65.230299,50.661209
4,47.658466,38.360926
...,...,...
95,35.364851,29.755395
96,52.961203,42.072842
97,52.610553,41.827387
98,50.051135,40.035794


In [67]:
X = sm.add_constant(data['CIE'])
model = sm.OLS(data['SEE'], X).fit()
model.summary()

0,1,2,3
Dep. Variable:,SEE,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,1.294e+33
Date:,"Thu, 09 Nov 2023",Prob (F-statistic):,0.0
Time:,12:04:41,Log-Likelihood:,3256.6
No. Observations:,100,AIC:,-6509.0
Df Residuals:,98,BIC:,-6504.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.0000,9.69e-16,5.16e+15,0.000,5.000,5.000
CIE,0.7000,1.95e-17,3.6e+16,0.000,0.700,0.700

0,1,2,3
Omnibus:,102.298,Durbin-Watson:,2.0
Prob(Omnibus):,0.0,Jarque-Bera (JB):,802.194
Skew:,-3.705,Prob(JB):,6.39e-175
Kurtosis:,14.73,Cond. No.,274.0


In [68]:
X = data[['CIE']]
y = data['SEE']

In [69]:
X

Unnamed: 0,CIE
0,54.967142
1,48.617357
2,56.476885
3,65.230299
4,47.658466
...,...
95,35.364851
96,52.961203
97,52.610553
98,50.051135


In [70]:
y

0     43.476999
1     39.032150
2     44.533820
3     50.661209
4     38.360926
        ...    
95    29.755395
96    42.072842
97    41.827387
98    40.035794
99    38.357890
Name: SEE, Length: 100, dtype: float64

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [72]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [73]:
y_pred = lr.predict(X_test)

In [74]:
y_pred

array([36.37210847, 44.28173402, 42.52976924, 34.96109054, 29.65034607,
       41.37802865, 40.47269743, 38.46229679, 36.75607615, 43.47699907,
       33.64383147, 35.78805371, 50.95250559, 32.5960235 , 40.67954285,
       38.36092638, 40.60932948, 37.90694855, 41.6937359 , 52.96594729])

In [81]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse=np.square(mean_squared_error(y_test,y_pred))
mae=mean_absolute_error(y_test,y_pred)

mse

In [82]:
mse

7.636173562539394e-29

In [83]:
r2

1.0

rmse

In [84]:
rmse

5.831114667722558e-57

In [85]:
mae

8.348877145181177e-15