In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

from src import (
    Model, Trainer, Dataset,
    resample_data_by_10min,
    create_time_series_data,
    evaluate_predictions
)

In [None]:
models = {
    "regression": {
        "Linear Regression": LinearRegression(),
    },
    "time_series": {
        "LSTM": Model("LSTM"),
    },
}

trainer = Trainer(models)

In [None]:
class CustomDataset(Dataset):
    def pre_process(self, train_data, test_data, target):
        look_back_num = 12
        scaler = MinMaxScaler()
        features = [
            "WindSpeed(m/s)", 
            "Pressure(hpa)", 
            "Temperature(°C)", 
            "Humidity(%)", 
            "Sunlight(Lux)"
        ]

        train_data = resample_data_by_10min(train_data)
        train_data.interpolate(inplace=True)

        x_train = train_data[features].values
        y_train = train_data[["Power(mW)"]].values

        x_train = scaler.fit_transform(x_train)
        x_ts, y_ts = create_time_series_data(x_train, look_back_num=look_back_num)

        dataset = {
            "train": {
                "regression": {"x": x_train, "y": y_train},
                "time_series": {"x": x_ts, "y": y_ts}
            }
        }

        if test_data is not None:
            test_data = resample_data_by_10min(test_data)
            test_data.interpolate(inplace=True)

            x = np.array([
                scaler.transform(
                    test_data[
                        (test_data["LocationCode"] == int(row["LocationCode"])) &
                        (test_data["DateTime"].dt.date == row["Datetime"].date()) &
                        (test_data["DateTime"].dt.time < pd.Timestamp("09:00").time())
                    ]
                    .sort_values(by="DateTime", ascending=False)[features]
                    .head(look_back_num)
                    .values
                )
                for _, row in target.iterrows()
            ])
        
            dataset["test"] = {
                "x": x,
                "序號": target["序號"].values
            }

        return dataset

dataset = CustomDataset(train_file="./data/train.csv", test_file="./data/test.csv", target_file="./data/target.csv")
print(dataset)

In [None]:
trainer.train(dataset["train"])

In [None]:
predictions = trainer.predict(dataset["test"])
predictions.to_csv("./data/predictions.csv", index=False)
predictions

In [None]:
evaluate_predictions(target_file="data/target.csv", prediction_file="./data/predictions.csv")