In [10]:
%matplotlib inline

import numpy as np
from sklearn import datasets, linear_model, preprocessing, model_selection
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import pandas as pd

In [13]:
def calculate_aic_bic(y_true, y_pred, k):
    """
    Вычисляем AIC и BIC по предсказаниям модели.
    k - количество параметров модели
    """
    n = len(y_true)
    rss = np.sum((y_true - y_pred) ** 2)
    aic = n * np.log(rss / n) + 2 * k
    bic = n * np.log(rss / n) + k * np.log(n)
    return aic, bic

In [14]:
df = pd.read_csv('exxsol_data.csv', sep=';', header=(0))
y = df['tohn/hour']
freq_temp = df[['freq_peak','temp']]

freq_temp, y = shuffle(freq_temp, y)

lr = linear_model.LinearRegression()
predicted_lr = model_selection.cross_val_predict(lr, freq_temp, y, cv=20)
score_lr = model_selection.cross_val_score(lr, freq_temp, y, scoring='r2', cv=20)
lr.fit(freq_temp, y)
k_lr = freq_temp.shape[1] + 1
aic_lr, bic_lr = calculate_aic_bic(y, lr.predict(freq_temp), k_lr)
print(f"Linear Regression:   AIC: {aic_lr:.2f}, BIC: {bic_lr:.2f}")


encv = linear_model.ElasticNetCV(cv=10, max_iter=3000, n_alphas=10)
predicted_encv = model_selection.cross_val_predict(encv, freq_temp, y, cv=20)
score_encv = model_selection.cross_val_score(encv, freq_temp, y, scoring='r2', cv=20)
encv.fit(freq_temp, y)
k_encv = np.sum(encv.coef_ != 0) + 1
aic_encv, bic_encv = calculate_aic_bic(y, encv.predict(freq_temp), k_encv)
print(f"ElasticNetCV:     AIC: {aic_encv:.2f}, BIC: {bic_encv:.2f}")

Linear Regression:   AIC: -1272.09, BIC: -1258.43
ElasticNetCV:     AIC: -1100.04, BIC: -1086.38
