In [None]:
import pandas as pd

from src import (
    Dataset,
    resample_data_by_10min,
    generate_full_data,
    filter_nan_days,
    encode_datetime,
    merge_external,
    parse_target,
    post_process,
    calculate_metrics,
)

In [None]:
class CustomDataset(Dataset):
    def pre_process(self, train_data, test_data, upload):
        dataset = {"train": {}, "test": {}}
        feature = [
            "LocationCode", "month", "day", "hour", "minute", "timestamp",
            "PS01","PS02","TX01","TD01","RH01","WD01","WD02","WD07","WD08","PP01","SS01","GR01","TS03"
        ]

        train_data = (
            generate_full_data(train_data, start_time="09:00", end_time="16:59")
            .pipe(resample_data_by_10min)
            .pipe(filter_nan_days)
            .pipe(merge_external, external_file="data/10min.csv")
            .pipe(encode_datetime)
        )

        dataset["train"] = {"X": train_data[feature], "y": train_data["Power(mW)"]}

        if test_data is not None and upload is not None:
            dataset["test"]["X"] = (
                parse_target(upload)
                .pipe(merge_external, external_file="data/10min.csv")
                .pipe(encode_datetime)
            )[feature]

        return dataset

dataset = CustomDataset(
    train_file="./data/all_data.csv",
    test_file="./data/all_data.csv",
    upload_file="./data/upload.csv"
)
print(dataset)

In [None]:
from catboost import CatBoostRegressor
model = CatBoostRegressor(iterations=1e7, verbose=100000, task_type="GPU")
model.fit(dataset["train"]["X"], dataset["train"]["y"])
model.save_model("model.cbm")
calculate_metrics(dataset["train"]["y"], model.predict(dataset["train"]["X"]))

In [None]:
predictions = model.predict(dataset["test"]["X"])
upload = pd.read_csv("data/upload.csv")
upload["答案"] = post_process(predictions)
upload.to_csv("submission.csv", index=False)