In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import os

from matplotlib import pyplot as plt
from dotenv import load_dotenv

from models.linear_regressions import Linear_reg

load_dotenv()
sns.color_palette('colorblind')
plt.style.use('Solarize_Light2')

# Setting default DPI, pulling it from dotenv if it exists, setting it on 100 if not

pc_dpi = int(os.getenv('DPI'))

if pc_dpi is None:
    pc_dpi = 100


In [None]:
file = file_ghg_eui = "./data/seattle_predict_ghg_eui.csv"


In [None]:
df_model = pd.read_csv(file_ghg_eui).astype(float)

df_model.columns


In [None]:
df_model.set_index("OSEBuildingID", inplace=True)

In [None]:
# Target_1 : target_GHGEmissionsIntensity(kgCO2e/ft2) : two cols

droplist_1 = [
    "scaled_GHGEmissionsIntensity(kgCO2e/ft2)",  # Scaled target
    "target_SourceEUI(kWh/m2)"  # not to scale
    ]

df_model_ghg = df_model.drop(columns=droplist_1)


In [None]:
ghg_model = Linear_reg(dataframe=df_model_ghg, target="target_GHGEmissionsIntensity(kgCO2e/ft2)")


In [None]:
ghg_model.execute_all()


In [None]:
ghg_model.std_table


In [None]:
ghg_model.elnet_table


In [None]:
print(ghg_model.ridge_best_alpha)
ghg_model.ridge_table


In [None]:
print(ghg_model.lasso_best_alpha)
ghg_model.lasso_table


In [None]:
ghg_model.ridge_plot()

In [None]:
ghg_model.lasso_plot()

In [None]:
ghg_model.enet_best_l1_ratio


In [None]:
ghg_model.elnet_cv.alpha_

In [None]:
ghg_model.format_all_metrics()


In [None]:
# override_train, override_test = "./data/train_set_70percent_ghg.csv", "./data/test_set_30percent_ghg.csv"

# df_train_ovr = pd.read_csv(override_train)
# df_train_ovr.set_index("OSEBuildingID", inplace=True)
# df_test_ovr = pd.read_csv(override_test)
# df_test_ovr.set_index("OSEBuildingID", inplace=True)


In [None]:
# ghg_model.force_split(df_test_ovr=df_test_ovr, df_train_ovr=df_train_ovr)

# ghg_model.execute_all()


In [None]:
from sklearn.model_selection import RepeatedKFold

kfold = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

ghg_model.common_parameters["cv"] = kfold


In [None]:
ghg_model.execute_all()

In [None]:
ghg_model.ridge_time_card

In [None]:
ghg_model.format_all_metrics()