In [1]:
import os
import torch
os.chdir("../")
os.environ['CUDA_VISIBLE_DEVICES']="3"

In [2]:
import jax
import jax.numpy as jnp
import flax.linen as nn
import optax
import pandas as pd
from functools import partial
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from utilities.fits import fit
from datasets.dataset_loader import dataset_load
from utilities import plot, gmm, errors, predict, preprocess
from models import seq2point

In [3]:
import pandas as pd
import jax.numpy as jnp
from datetime import datetime
from sklearn.preprocessing import StandardScaler


def dataset_load(appliances, train, test=None):
    x_train = []
    y_train = []
    x_train_timestamp = []
    n = 9
    units_to_pad = n // 2
    scaler_x = StandardScaler()
    scaler_y = StandardScaler()
    scaler_time = StandardScaler()
    # train
    for key, values in train.items():
        df = pd.read_csv(
            f"datasets/Building{key}_NILM_data_basic.csv", usecols=["Timestamp", "main", appliances[0]])
        df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
        startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
        endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

        if startDate > endDate:
            raise "Start Date must be smaller than Enddate."

        df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
        df.dropna(inplace=True)
        x = df["main"].values
        y = df[appliances[0]].values
        timestamp_train = (pd.to_datetime(df["Timestamp"]).astype(int)/ 10**18).values
        x = jnp.pad(x, (units_to_pad, units_to_pad),
                    'constant', constant_values=(0, 0))
        x = jnp.array([x[i: i + n] for i in range(len(x) - n + 1)])
        x_train.extend(x)
        y_train.extend(y)
        x_train_timestamp.extend(torch.Tensor(timestamp_train))


    x_train = jnp.array(x_train)
    y_train = jnp.array(y_train).reshape(-1, 1)
    x_train_timestamp = torch.Tensor(x_train_timestamp).reshape(-1,1)
    x_train = scaler_x.fit_transform(x_train)
    y_train = scaler_y.fit_transform(y_train)
    x_train_timestamp = scaler_time.fit_transform(x_train_timestamp)


    # test
    x_test = []
    y_test = []
    x_test_timestamp = []
    for key, values in test.items():
        df = pd.read_csv(
            f"datasets/Building{key}_NILM_data_basic.csv", usecols=["Timestamp", "main", appliances[0]])
        df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
        startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
        endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

        if startDate > endDate:
            raise "Start Date must be smaller than Enddate."

        df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
        df.dropna(inplace=True)
        x = df["main"].values
        y = df[appliances[0]].values
        timestamp = (pd.to_datetime(df["Timestamp"]).astype(int)/ 10**18).values
        x = jnp.pad(x, (units_to_pad, units_to_pad),
                    'constant', constant_values=(0, 0))
        x = jnp.array([x[i: i + n] for i in range(len(x) - n + 1)])
        x_test.extend(x)
        y_test.extend(y)
        x_test_timestamp.extend(timestamp)

    x_test = jnp.array(x_test)
    y_test = jnp.array(y_test).reshape(-1, 1)
    x_test_timestamp = torch.Tensor(x_test_timestamp).reshape(-1,1)

    x_test = scaler_x.transform(x_test)
    x_test_timestamp = scaler_time.transform(x_test_timestamp)
#     y_test = scaler_y.transform(y_test)

    x_train = jnp.array(x_train).reshape(x_train.shape[0], n)
    y_train = jnp.array(y_train)
    x_train_timestamp = torch.Tensor(x_train_timestamp).reshape(x_train_timestamp.shape[0], 1)
    x_test = jnp.array(x_test).reshape(x_test.shape[0], n)
    y_test = jnp.array(y_test)
    x_test_timestamp = torch.Tensor(x_test_timestamp).reshape(x_test_timestamp.shape[0], 1)

    return x_train, y_train, x_test, y_test, x_train_timestamp, x_test_timestamp, scaler_x, scaler_y, scaler_time


In [4]:
train ={
    1: {
          'start_time': "2011-04-28" ,
          'end_time': "2011-05-15"
        },
    3: {
          'start_time': "2011-04-19" ,
          'end_time': "2011-05-22"
    }
}
test = {
     2: {
          'start_time': "2011-04-21" ,
          'end_time': "2011-05-21"
    }
}
appliances = ["Microwave"]

In [5]:
x_train, y_train, x_test, y_test, x_train_timstamp, x_test_timestamp, scaler_x, scaler_y,scaler_time= dataset_load(appliances, train, test)

In [7]:
model = seq2point.seq2point()

In [8]:
%%capture cap --no-stderr
%%time
params =  model.init(jax.random.PRNGKey(0), x_train, True)
params, losses = fit(model, params, x_train, y_train, False, batch_size=32, learning_rate=0.001, epochs=200)

In [9]:
y_hat = model.apply(params, x_test, True, rngs={"dropout":jax.random.PRNGKey(0)})
y_hat = scaler_y.inverse_transform(y_hat)
print(f"RMSE : {errors.rmse(y_test, y_hat):.4f}\nMAE  : {errors.mae(y_test,y_hat):.4f}")

RMSE : 94.6669
MAE  : 14.5782


In [10]:
train ={
    1: {
          'start_time': "2011-04-28" ,
          'end_time': "2011-05-15"
        },
         2: {
          'start_time': "2011-04-21" ,
          'end_time': "2011-05-21"
    }
    
}
test = {
    3: {
          'start_time': "2011-04-19" ,
          'end_time': "2011-05-22"
    }
}
appliances = ["Microwave"]

In [11]:
x_train, y_train, x_test, y_test, x_train_timstamp, x_test_timestamp, scaler_x, scaler_y,scaler_time= dataset_load(appliances, train, test)

In [12]:
model = seq2point.seq2point()

In [13]:
%%capture cap --no-stderr
%%time
params =  model.init(jax.random.PRNGKey(0), x_train, True)
params, losses = fit(model, params, x_train, y_train, False, batch_size=32, learning_rate=0.001, epochs=200)

In [14]:
y_hat = model.apply(params, x_test, True, rngs={"dropout":jax.random.PRNGKey(0)})
y_hat = scaler_y.inverse_transform(y_hat)

In [15]:
print(f"RMSE : {errors.rmse(y_test, y_hat):.4f}\nMAE  : {errors.mae(y_test,y_hat):.4f}")

RMSE : 82.1849
MAE  : 13.5092


In [16]:
train ={ 3: {
          'start_time': "2011-04-19" ,
          'end_time': "2011-05-22"
    },
    
         2: {
          'start_time': "2011-04-21" ,
          'end_time': "2011-05-21"
    }
    
}
test = {1: {
          'start_time': "2011-04-28" ,
          'end_time': "2011-05-15"
        },
   
}
appliances = ["Microwave"]

In [17]:
x_train, y_train, x_test, y_test, x_train_timstamp, x_test_timestamp, scaler_x, scaler_y,scaler_time= dataset_load(appliances, train, test)

In [18]:
model = seq2point.seq2point()

In [19]:
%%capture cap --no-stderr
%%time
params =  model.init(jax.random.PRNGKey(0), x_train, True)
params, losses = fit(model, params, x_train, y_train, False, batch_size=32, learning_rate=0.001, epochs=200)

In [20]:
y_hat = model.apply(params, x_test, True, rngs={"dropout":jax.random.PRNGKey(0)})
y_hat = scaler_y.inverse_transform(y_hat)

In [21]:
print(f"RMSE : {errors.rmse(y_test, y_hat):.4f}\nMAE  : {errors.mae(y_test,y_hat):.4f}")

RMSE : 107.6699
MAE  : 17.0333


In [None]:
MAE  :14.5782 build2
MAE  : 13.5092 build3
MAE : 17.0333 build1