<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Powerpredict" data-toc-modified-id="Powerpredict-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Powerpredict</a></span><ul class="toc-item"><li><span><a href="#Loading-the-dataset" data-toc-modified-id="Loading-the-dataset-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Loading the dataset</a></span></li></ul></li></ul></div>

# Powerpredict

Daily power consumption is related to the weather (rain, sunshine, temperature, etc). 
Prediction of the power consumption based on the weather is relevant for energy suppliers.
In this dataset you have to use the provided weather information to predict the power consumption.

## Loading the dataset

In [None]:
import pandas as pd
import os

In [None]:
DATASET_PATH = "."
if os.path.exists("/data/mlproject22"):
    DATASET_PATH = "/data/mlproject22"

In [None]:
powerpredict = pd.read_csv(os.path.join(DATASET_PATH,"powerpredict.csv.zip"))
X = powerpredict.drop(columns=["power_consumption"])
y = powerpredict[["power_consumption"]]

print(X.shape)
print(y.shape)

Some columns are dropped here for simplicity, but they might provide useful information as well, so you might want to use them.

In [None]:
print(X)


def drop_object_columns(df):
    drop_cols = [c for t, c in zip([t !="object" for t in df.dtypes], df.columns) if not t]
    return df.drop(columns=drop_cols)

DOC = drop_object_columns

def predict_show_metrics(name, reg, metric):
    print(f"{name}", metric(y, reg.predict(DOC(X))))


Here is an example dummy ML method showing the success of such a simple predictor:

In [None]:
import sklearn.metrics
import sklearn.dummy
reg = sklearn.dummy.DummyRegressor()
reg.fit(DOC(X), y)
metric = sklearn.metrics.mean_absolute_error

predict_show_metrics("Dummy", reg, metric)

You have to implement a simple method that performs the predictions with the given signature:

In [None]:
def leader_board_predict_fn(values):
    # YOUR CODE HERE (please remove 'raise NotImplementedError()')
    raise NotImplementedError()
    return reg.predict(DOC(values))  # replace this with your implementation

which will then be used to calculate the leaderboard score in a way similar to this:

In [None]:
def get_score():
    """
    Function to compute scores for train and test datasets.
    """
    
    import sklearn.metrics
    import pandas as pd
    import pathlib
    import os

    try:
        TEST_DATASET_PATH = "."
        if os.path.exists("/data/mlproject22/"):
            TEST_DATASET_PATH = "/data/mlproject22/"

        test_data = pd.read_csv(os.path.join(TEST_DATASET_PATH,"powerpredict.csv.zip"))
        X_test = test_data.drop(columns=["power_consumption"])
        y_test = test_data[["power_consumption"]]
        y_predicted = leader_board_predict_fn(X_test)
        dataset_score = sklearn.metrics.mean_absolute_error(y_test, y_predicted)
    except Exception:
        dataset_score = float("nan")
    print(f"Train Dataset Score: {dataset_score}")

    import os
    import pwd
    import time
    import datetime
    import pandas as pd
    user_id = pwd.getpwuid( os.getuid() ).pw_name
    curtime = time.time()
    dt_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

    try:  
        HIDDEN_DATASET_PATH = os.path.expanduser("/data/mlproject22-test-data/")
        test_data = pd.read_csv(os.path.join(HIDDEN_DATASET_PATH,"hidden_powerpredict.csv.zip"))
        X_test = test_data.drop(columns=["power_consumption"])
        y_test = test_data[["power_consumption"]]
        y_predicted = leader_board_predict_fn(X_test)
        hiddendataset_score = sklearn.metrics.mean_absolute_error(y_test, y_predicted)
        print(f"Test Dataset Score: {hiddendataset_score}")
        score_dict = dict(
            score_hidden=hiddendataset_score,
            score_train=dataset_score,
            unixtime=curtime,
            user=user_id,
            dt=dt_now,
            comment="",
        )
    except Exception as e:
        err = str(e)
        score_dict = dict(
            score_hidden=float("nan"),
            score_train=dataset_score,
            unixtime=curtime,
            user=user_id,
            dt=dt_now,
            comment=err
        )

    #if list(pathlib.Path(os.getcwd()).parents)[0].name == 'source':
    #    print("we are in the source directory... replacing values.")
    #    print(pd.DataFrame([score_dict]))
    #    score_dict["score_hidden"] = -1
    #    score_dict["score_train"] = -1
    #    print("new values:")
    #    print(pd.DataFrame([score_dict]))

    pd.DataFrame([score_dict]).to_csv("powerpredict.csv", index=False)
    
get_score()