In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
data = pd.read_csv('6.3.7.csv', sep=";")
data

Unnamed: 0,Mesiac,Výkonnosť,Pevnosť,Životnosť,Cena
0,september,95,12.0,27,192.36
1,október,84,12.2,24,116.56
2,november,92,12.6,29,214.2
3,december,90,12.0,22,136.83
4,január,88,12.8,27,152.23
5,február,91,12.3,26,168.4
6,marec,86,13.0,29,179.4
7,apríl,85,12.5,29,187.21
8,máj,84,12.2,23,145.23
9,jún,93,13.5,25,165.54


In [3]:
X = data[['Výkonnosť', 'Pevnosť', 'Životnosť']]
y = data['Cena']

In [4]:
def forward_stepwise(X, y):
    remaining = set(X.columns)
    selected = []
    current_score, best_new_score = float('inf'), float('inf')
    while remaining and current_score == best_new_score:
        scores_with_candidates = []
        for candidate in remaining:
            model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[selected + [candidate]]))).fit()
            score = model.rsquared_adj
            scores_with_candidates.append((score, candidate))
        scores_with_candidates.sort()
        best_new_score, best_candidate = scores_with_candidates.pop()
        if current_score > best_new_score:
            remaining.remove(best_candidate)
            selected.append(best_candidate)
            current_score = best_new_score
    return selected

In [5]:
selected_predictors = forward_stepwise(X, y)

  x = pd.concat(x[::order], 1)


In [6]:
model = sm.OLS(y, sm.add_constant(X[selected_predictors])).fit()

In [7]:
model.summary()



0,1,2,3
Dep. Variable:,Cena,R-squared:,0.579
Model:,OLS,Adj. R-squared:,0.544
Method:,Least Squares,F-statistic:,16.53
Date:,"Tue, 04 Apr 2023",Prob (F-statistic):,0.00157
Time:,22:25:03,Log-Likelihood:,-58.888
No. Observations:,14,AIC:,121.8
Df Residuals:,12,BIC:,123.1
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-15.0041,44.642,-0.336,0.743,-112.270,82.262
Životnosť,6.9607,1.712,4.065,0.002,3.230,10.691

0,1,2,3
Omnibus:,1.322,Durbin-Watson:,2.962
Prob(Omnibus):,0.516,Jarque-Bera (JB):,0.693
Skew:,-0.536,Prob(JB):,0.707
Kurtosis:,2.799,Cond. No.,249.0


In [8]:
corrMatrix = data.corr(method = 'pearson')
corrMatrix

Unnamed: 0,Výkonnosť,Pevnosť,Životnosť,Cena
Výkonnosť,1.0,0.124216,0.236443,0.533517
Pevnosť,0.124216,1.0,0.015664,0.148965
Životnosť,0.236443,0.015664,1.0,0.76115
Cena,0.533517,0.148965,0.76115,1.0


In [9]:
X = data[['Životnosť','Výkonnosť']]
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,Cena,R-squared:,0.712
Model:,OLS,Adj. R-squared:,0.659
Method:,Least Squares,F-statistic:,13.58
Date:,"Tue, 04 Apr 2023",Prob (F-statistic):,0.00107
Time:,22:25:03,Log-Likelihood:,-56.242
No. Observations:,14,AIC:,118.5
Df Residuals:,11,BIC:,120.4
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-236.4873,105.825,-2.235,0.047,-469.406,-3.569
Životnosť,6.1510,1.524,4.037,0.002,2.798,9.504
Výkonnosť,2.7158,1.208,2.248,0.046,0.057,5.375

0,1,2,3
Omnibus:,0.805,Durbin-Watson:,2.544
Prob(Omnibus):,0.669,Jarque-Bera (JB):,0.712
Skew:,-0.454,Prob(JB):,0.7
Kurtosis:,2.371,Cond. No.,2430.0
