In [1]:
# Set up project path

import sys
from pathlib import Path
notebook_path = sys.path[0]
sys.path.append(str(Path(notebook_path).parent))
print(sys.path)

['/home/ronin/Cloud_Drive/AI_works/Projects/Amperon/notebook', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/ronin/Cloud_Drive/AI_works/Projects/Amperon/venv/lib/python3.8/site-packages', '/home/ronin/Cloud_Drive/AI_works/Projects/Amperon']


In [2]:
import pandas

from load_prediction import DATA_DIR

# Define constants
N_STATION = 28
N_EXP = 1000
PREDICTION_OUTPUT_DIR = DATA_DIR / "predictions"
LOAD_PREDICTION_FILE_NAME = "load_predict"

# Load data
hist_data = pandas.read_csv(DATA_DIR / "load_hist_data.csv")
weather_data = pandas.read_csv(DATA_DIR / "weather_data.csv")
prediction = pandas.read_csv(DATA_DIR / "probability_estimates.csv")

In [3]:
# Add temperature information to hist_data to prepare features
for station_id in range(1, N_STATION + 1):
    temperature = weather_data.query(f"`Station ID` == {station_id}")["Temperature"].values
    hist_data.insert(len(hist_data.columns), f"Station_{station_id}_T", temperature, True)

In [4]:
# Prepare features
X_all_T = hist_data.drop(["Date", "Load"], axis=1)

In [5]:
from load_prediction.utils.data_manipulators import Date

# Compute days into a year for a given date and add it as a feature
dates = list(map(Date, hist_data["Date"]))
X_all_T.insert(1, "Days_of_Year", [d.days_in_year for d in dates], True)
X_all_T.insert(2, "Year", [d.year for d in dates], True)
X_all_T.insert(3, "Is_Holiday", [d.is_holiday for d in dates], True)
X_all_T.insert(4, "Weekday", [d.weekday for d in dates], True)

In [6]:
# Compute average temperature to be used as a feature as well as a training data
Temp = pandas.DataFrame()
temperatures = X_all_T.drop(["Days_of_Year", "Hour", "Year"], axis=1)
Temp.insert(0, "Temperature", temperatures.values.mean(axis=1), True)

Y = hist_data["Load"]

# Prepare features for training a load prediction model
X_avg_T = X_all_T[["Days_of_Year", "Year", "Is_Holiday", "Weekday", "Hour"]]
X_avg_T.insert(len(X_avg_T.columns), "Temperature", temperatures.values.mean(axis=1), True)

In [7]:
# Prepare input for prediction
prediction_dates = list(map(Date, prediction["Date"]))
prediction.insert(1, "Days_of_Year", [d.days_in_year for d in prediction_dates], True)
prediction.insert(2, "Year", [d.year for d in prediction_dates], True)
prediction.insert(3, "Is_Holiday", [d.is_holiday for d in prediction_dates], True)
prediction.insert(4, "Weekday", [d.weekday for d in prediction_dates], True)

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Build prediction process
def process(n_random):
    X_2_train, X_2_test, Y_train, Y_test = train_test_split(X_avg_T, Y, test_size=0.2, random_state=n_random)
    mdl_rf_2 = RandomForestRegressor()
    mdl_rf_2.fit(X_2_train, Y_train)

    X_T = X_2_train[["Days_of_Year", "Year", "Hour"]]
    mdl_rf_T = RandomForestRegressor()
    mdl_rf_T.fit(X_T, X_2_train["Temperature"])

    X_pred = prediction.drop(["Date", "Daily Peak Probability"], axis=1)
    X_pred.insert(len(X_pred.columns), "Temperature", mdl_rf_T.predict(X_pred[["Days_of_Year", "Year", "Hour"]]), True)

    answers = X_pred.copy()
    answers.insert(0, "Date", prediction["Date"], True)
    answers.insert(len(answers.columns), "Load", mdl_rf_2.predict(X_pred), True)
    answers = answers.drop(["Days_of_Year"], axis=1)
    answers.to_csv(PREDICTION_OUTPUT_DIR / f"{LOAD_PREDICTION_FILE_NAME}_{n_random}.csv")

In [9]:
# 1000 Predictions in parallel
from load_prediction.utils.multi_processing import pmap
n_tries = [n for n in range(N_EXP)]
pmap(process, n_tries, num_workers=8)
print("Prediction complete")

Prediction complete


In [10]:
from load_prediction import DATA_DIR

PREDICTIONS_DIR = DATA_DIR / "predictions"
PROB_INPUT = PREDICTIONS_DIR.parent / "probability_estimates.csv"
PROB_OUTPUT = PREDICTIONS_DIR.parent / "output/probability_estimates.csv"

In [11]:
import numpy as np
from load_prediction.scripts.prediction import N_EXP
from load_prediction.utils.data_manipulators import PredictionDF

vote_df = pandas.read_csv(PROB_INPUT)
vote_df["Vote"] = np.zeros(len(vote_df), dtype=int)

all_dates = set(vote_df["Date"].values)

for file in PREDICTIONS_DIR.iterdir():
    predict_df = PredictionDF(file)
    date_to_peak = {date: predict_df.peak_hour_on_date(date) for date in all_dates}
    for date, hour in date_to_peak.items():
        row_idx = vote_df.query(f"Date == '{date}' & Hour == {hour}")["Vote"].index
        vote_df.loc[row_idx, "Vote"] += 1
vote_df.to_csv(PREDICTIONS_DIR.parent / "output/vote_out.csv")

vote_df["Daily Peak Probability"] = vote_df["Vote"] / N_EXP
vote_df = vote_df.drop(["Vote"], axis=1)
vote_df.to_csv(PROB_OUTPUT)
print("Data compilation complete")

Data compilation complete
