In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from utils.logger import PrettyLogger
from utils.io_func import save_to_csv, save_to_pkl
from utils.helper import RFHelper

In [None]:
logger = PrettyLogger()
helper = RFHelper()

In [None]:
BASE_SITES = ["Site_1"]
TRAIN_YEARS = [str(year) for year in [2015, 2016, 2017]]
TEST_YEARS = [str(year) for year in [2018]]
X_PATH_TEMPLATE = "../preprocessing/out/{site}/x-corn_soybean-{year}.npy"
Y_PATH_TEMPLATE = "../preprocessing/out/{site}/y-corn_soybean-{year}.npy"
RESULT_DIR = "./out/end_of_the_season/RF-corn_soybean/{}/".format(
    "_".join(BASE_SITES)
)

# Input

In [None]:
def get_paths(path_template, sites, years):
    paths = []
    for site in sites:
        for year in years:
            paths.append(path_template.format(site=site, year=year))
    return paths


x_train = helper.input_x(get_paths(X_PATH_TEMPLATE, BASE_SITES, TRAIN_YEARS))
y_train = helper.input_y(get_paths(Y_PATH_TEMPLATE, BASE_SITES, TRAIN_YEARS))
x_test = helper.input_x(get_paths(X_PATH_TEMPLATE, BASE_SITES, TEST_YEARS))
y_test = helper.input_y(get_paths(Y_PATH_TEMPLATE, BASE_SITES, TEST_YEARS))

# Normalization

In [None]:
scaler, x_train, x_test = helper.normalize_without_scaler(x_train, x_test)

# Training models

In [None]:
rf = helper.build_model()
helper.train_model(rf, x_train, y_train)

# Prediction

In [None]:
y_train_soft_pred, y_train_hard_pred = helper.predict(rf, x_train)
y_test_soft_pred, y_test_hard_pred = helper.predict(rf, x_test)
acc_train = accuracy_score(y_train, y_train_hard_pred)
acc_test = accuracy_score(y_test, y_test_hard_pred)
logger.info("training acc:", acc_train, "test acc:", acc_test)

# Saving all

In [None]:
save_to_csv(
    y_train_soft_pred, os.path.join(RESULT_DIR, "y_train_soft_pred.csv")
)
save_to_csv(
    y_test_soft_pred, os.path.join(RESULT_DIR, "y_test_soft_pred.csv")
)
save_to_csv(
    y_train_hard_pred, os.path.join(RESULT_DIR, "y_train_hard_pred.csv")
)
save_to_csv(
    y_test_hard_pred, os.path.join(RESULT_DIR, "y_test_hard_pred.csv")
)
save_to_csv(
    np.array([[acc_train, acc_test]]),
    os.path.join(RESULT_DIR, "perf_abstract.csv"),
    header=["acc_train", "acc_test"]
)
save_to_pkl(scaler, os.path.join(RESULT_DIR, "scaler.pkl"))
save_to_pkl(rf, os.path.join(RESULT_DIR, "rf.pkl"))
save_to_csv(
    helper.train_time_list,
    os.path.join(RESULT_DIR, "train_time.csv"),
    header=["train_start_time", "train_end_time", "duration"]
)
save_to_csv(
    helper.test_time_list,
    os.path.join(RESULT_DIR, "test_time.csv"),
    header=["test_start_time", "test_end_time", "duration"]
)