In [1]:
import numpy as np
import pandas as pd
import os
import re
import math
import random
import pickle
import tensorflow as tf
import matplotlib.dates as mdates
from datetime import time
xformatter = mdates.DateFormatter('%H:%M')  # for time axis plots
import datetime
from dateutil.parser import parse
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
%matplotlib inline
from matplotlib import style
style.use('seaborn-whitegrid')

from pandas.tseries.frequencies import to_offset
from pickle import load,dump
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest,f_regression
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import LSTM,GRU,Bidirectional
from keras.models import load_model
from keras.callbacks import EarlyStopping
from keras.backend import clear_session
import warnings
warnings.filterwarnings('ignore')

In [2]:
FORMAT_TIME='%Y-%m-%d %H:%M:%S'

In [3]:
# Nhap du lieu train data
def import_train_data(path_file_train):
    df = pd.read_csv(path_file_train)
    df['TimeStamp'] = pd.to_datetime(df['TimeStamp']
                                      )
    df = df.set_index('TimeStamp')
    return df

In [4]:
def resample_df(df, resample_time, time_col='TimeStamp'):
    """
    resample_time: `minute`
    """
    resample_df = df.copy()
    if resample_time >= 30:
        resample_df = resample_df.set_index(
            resample_df[time_col] - to_offset(str(resample_time//2)+"min"))
    resample_df = resample_df.resample(str(resample_time)+'min', label='right').mean()
    return resample_df

In [5]:
all_plant = import_train_data('./LN2_training_9.csv')
# all_plant = all_plant.reset_index()
all_plant = resample_df(all_plant, resample_time = 15, time_col='TimeStamp')

In [6]:
# Split train, val data theo ti le 8/2
def train_valid_split(df, split_ratio=[0.8, 0.2]):
    train_ratio, valid_ratio = split_ratio
    assert train_ratio + valid_ratio  == 1.0
    n_df = len(df)
    # Train / Validation  Split
    train_split = int(n_df * train_ratio)
    valid_split = int(n_df * (train_ratio + valid_ratio))

    train = df[:train_split]
    val = df[train_split:valid_split]

    print(f'Train set: {len(train)} ')
    print(f'Validation set: {len(val)} ')

    return train, val 

In [7]:
def load_scale(df, path_scale):
    scale_df = pd.DataFrame(index=df.index)
    for col in df.columns:
        scaler = pickle.load(open(path_scale + col+'.pkl','rb'))
        scale_df[col] = scaler.transform(df[col].values.reshape(-1,1))[:,0]
    return scale_df

In [8]:
def num_step_1hour(df):
    """
    Get number step of 1 hours
    """
    step_hours = None
    if type(df.index) == pd.core.indexes.datetimes.DatetimeIndex:
        time_1step = int((df.index[1] - df.index[0]) /
                         np.timedelta64(1, 'm'))  # minute
        step_hours = 60 // time_1step
    return step_hours

In [9]:
def make_data_supervised(dt, num_pre_around=5, num_day_pre=3):
    step_lag_1_day = num_step_1hour(dt)*24
    dt_lag = pd.DataFrame()
    for col in dt.columns:
        for day_pre in range(num_pre_around+1):
            if day_pre == 0:
                dt_lag[col+'(t)'] = dt[col]
            else:
                dt_lag[col+'(t-'+str(day_pre)+')'] = dt[col].shift(day_pre)
        for day_pre in range(1, num_day_pre):
            step_lag = step_lag_1_day*day_pre
            for lag in range(num_pre_around+1):
                dt_lag[col+'(t-'+str(step_lag+lag) +
                       ')'] = dt[col].shift(step_lag+lag)
                dt_lag[col+'(t-'+str(step_lag-lag) +
                       ')'] = dt[col].shift(step_lag-lag)
    dt_lag = dt_lag.dropna()
    dt_lag['hour'] = dt_lag.index.hour
    dt_lag['day'] = dt_lag.index.day
    dt_lag['day_of_week'] = dt_lag.index.dayofweek
    dt_lag['month'] = dt_lag.index.month
    dt_lag['day_of_year'] = dt_lag.index.dayofyear
    return dt_lag

In [10]:
def get_or_create_path(path):
    if os.path.isdir(path) is False:
        os.mkdir(path)

In [11]:
def scale_func(train, val):
    train_scale_df = pd.DataFrame(index=train.index)
    val_scale_df = pd.DataFrame(index=val.index)
    
    for col in train.columns:
        scaler = MinMaxScaler()
        train_scale_df[col] = scaler.fit_transform(train[col].values.reshape(-1,1))[:,0]
        val_scale_df[col] = scaler.transform(val[col].values.reshape(-1,1))[:,0]
    return train_scale_df,val_scale_df

In [12]:
train_solar, val_solar = train_valid_split(all_plant,split_ratio=[0.8, 0.2])

Train set: 2304 
Validation set: 576 


In [13]:
train_scale, val_scale = scale_func(train_solar, val_solar)

In [14]:
train_scaler_lag = make_data_supervised(train_scale)
val_scaler_lag = make_data_supervised(val_scale)

In [15]:
col_analysis = list(train_scaler_lag)
train_solar = train_scaler_lag[col_analysis].copy()
val_solar = val_scaler_lag[col_analysis].copy()

In [16]:
train_solar

Unnamed: 0_level_0,TotW(t),TotW(t-1),TotW(t-2),TotW(t-3),TotW(t-4),TotW(t-5),TotW(t-96),TotW(t-97),TotW(t-95),TotW(t-98),...,T(t-189),T(t-196),T(t-188),T(t-197),T(t-187),hour,day,day_of_week,month,day_of_year
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-03 01:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.172417,0.193025,0.169472,0.196215,0.166528,1,3,5,9,246
2022-09-03 01:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.169472,0.190081,0.166528,0.193025,0.163584,1,3,5,9,246
2022-09-03 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.166528,0.187137,0.163584,0.190081,0.160640,2,3,5,9,246
2022-09-03 02:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.163584,0.184193,0.160640,0.187137,0.159990,2,3,5,9,246
2022-09-03 02:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.160640,0.181249,0.159990,0.184193,0.160779,2,3,5,9,246
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-24 23:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.123879,0.103596,0.126776,0.100698,0.129674,23,24,5,9,267
2022-09-24 23:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.126776,0.106493,0.129674,0.103596,0.132571,23,24,5,9,267
2022-09-24 23:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.129674,0.109391,0.132571,0.106493,0.135469,23,24,5,9,267
2022-09-24 23:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.132571,0.112288,0.135469,0.109391,0.138367,23,24,5,9,267


In [17]:
train_solar.values[:, 0:].shape

(2107, 89)

In [18]:
train_solar.values[:, :1]

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

In [19]:
train_X, train_y= train_solar.values[:, 1:],train_solar.values[:, :1]
train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1])
val_X, val_y= val_solar.values[:, 1:],val_solar.values[:, :1]
val_X = val_X.reshape(val_X.shape[0], 1, val_X.shape[1])

In [20]:
train_X.shape

(2107, 1, 88)

In [21]:
model = load_model('./LN2/part1/time1/4H.h5')

In [22]:
model.layers

[<keras.layers.wrappers.Bidirectional at 0x23c0dba6d60>,
 <keras.layers.core.dense.Dense at 0x23c0de53be0>]

In [23]:
# Freeze the layers except the last one
for layer in model.layers[:-1]:
    layer.trainable = False

In [24]:
model.compile(loss='mae', optimizer=tf.keras.optimizers.Adam(lr=0.001))

In [25]:
history = model.fit(train_X, train_y,validation_data=(val_X, val_y),callbacks = EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True), 
            epochs=100, batch_size=50, verbose=0,shuffle=False)
fig, ax = plt.subplots(2, 1, figsize=(14, 12))
fig.suptitle('Loss', y=0.93)
ax[0].plot(history.history['mae'], label='train')
ax[0].plot(history.history['val_mae'], label='val')
ax[0].set_title('mae')
ax[0].legend(loc='upper right')

ValueError: in user code:

    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\engine\training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\engine\training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\engine\training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\engine\training.py", line 808, in train_step
        y_pred = self(x, training=True)
    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\users\ad\appdata\local\programs\python\python39\lib\site-packages\keras\engine\input_spec.py", line 263, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 1, 51), found shape=(None, 1, 88)
