## Experminet Notebook
This notebook includes the process of training multiple models, monitoring performance, comparison and model selection over the same dataset.

## Global Variables

## Initiliazation

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

from datetime import datetime

In [None]:
dataframe = pd.read_csv("./KAG_energydata_complete.csv")

## Research Engine

In [None]:
def cyclic_encode(data, max_val):
  return np.column_stack([
        np.sin(2 * np.pi * data / max_val),
        np.cos(2 * np.pi * data / max_val)
    ])

def ohe_encode(data):
  encoder_data = OneHotEncoder(sparse_output=False)
  return (encoder_data.fit_transform(data.reshape(-1, 1)), encoder_data)

def binned_encode(data):
  pass

A class to train models with different specifications in just one line.

In [None]:
class ModelCreator:
  def __init__(self, weekday_enc='ohe', hour_enc='ohe', reg_type='lasso', poly_deg=1):
    self.params = {
      'weekday_enc': weekday_enc,
      'hour_enc': hour_enc,
      'reg_type': reg_type,
      'poly_deg': poly_deg
    }
    # self.model = None
    # self.results = {}
  def _get_preprocessor(self):
    """Private helper to build the encoding logic based on params."""
    # Logic to choose between One-Hot, Sin/Cos, or binned
    # This is where your custom ColumnTransformer would live
    pass

  def _extract_weekday(date_string: str) -> int:
    return datetime.strptime(date_string, "%Y-%m-%d").weekday()

  def fit(self, X, y):
    global dataframe
    """Constructs the pipeline and trains it."""
    # 1. Build Pipeline (Preprocessor -> Poly -> Scaler -> Regressor)
    # 2. Store it in self.model
    # 3. Use LassoCV or RidgeCV based on self.params['reg_type']
    
    # Extract date
    date_column = dataframe.iloc[:,0].values
    dataframe = dataframe.drop('date', axis=1)
    
    # Encoding
    # --> Hours
    v_slice_hours = np.vectorize(lambda x: x[11:13])
    hours = v_slice_hours(date_column)
    
    if self.params['hour_enc'] == 'ohe':
      hour_encoded, encoder_hour = ohe_encode(hours)
      hour_column_names = encoder_hour.get_feature_names_out(['hour'])
    
    elif self.params['hour_enc'] == 'trig':
      hour_encoded = cyclic_encode(hours, 24)
      hour_column_names = np.array(['hour_sin', 'hour_cos'])
    
    elif self.params['hour_enc'] == 'binned':
      pass
    
    else: raise Exception("this hour_enc is not supported or is not written correctly, please double check. supported hour_enc values: ohe, trig, binned")
    
    hour_encoded_df = pd.DataFrame(hour_encoded, columns=hour_column_names)
    dataframe = pd.concat([hour_encoded_df, dataframe], axis=1)
    
    # --> Weekdays
    v_slice_date = np.vectorize(lambda x: x[0:10])
    date_strings = v_slice_date(date_column)
    v_extract_weekday = np.vectorize(self._extract_weekday)
    weekdays = v_extract_weekday(date_strings)
    
    if self.params['hour_enc'] == 'ohe':
      weekdays_encoded, encoder_weekdays = ohe_encode(weekdays)
      weekdays_column_names = encoder_weekdays.get_feature_names_out(['weekday'])
    
    elif self.params['hour_enc'] == 'trig':
      weekdays_encoded = cyclic_encode(weekdays, 7)
      weekdays_column_names = np.array(['weekday_sin', 'weekday_cos'])
    
    elif self.params['hour_enc'] == 'binned':
      pass
    
    week_encoded_df = pd.DataFrame(weekdays_encoded, columns=weekdays_column_names)
    dataframe = pd.concat([week_encoded_df, dataframe], axis=1)
    
    # Polynomial Features
    if self.params['poly_deg'] > 1:
      pass
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    y_train = y_train.values.squeeze()
    y_test = y_test.values.squeeze()
    
    self.X_test = X_test
    self.y_test = y_test
    self.X_train = X_train
    self.y_train = y_train
    
    # PipeLine: Feature scaling & Training
    
    if self.params['reg_type'] == 'lasso':
      self.pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('lasso', LassoCV(cv=5, random_state=42, tol=0.1, n_jobs=-1, max_iter=40000))
      ])
    
    elif self.params['reg_type'] == 'ridge':
      pass
    
    else: raise Exception("this reg_type is not supported or is not written correctly, please double check. supported reg_type values: lasso, ridge")
    
    return self

  def predict(self, X):
    pass
  
  # def evaluate(self, X_test, y_test):
    # """Calculates metrics and stores them in self.results."""
    # y_pred = self.model.predict(X_test)
    # self.results['rmse'] = np.sqrt(mean_squared_error(y_test, y_pred))
    # self.results['r2'] = r2_score(y_test, y_pred)
    # return self.results