In [1]:
import pandas as pd
import numpy as np

In [40]:
import tensorflow as tf
from tensorflow import keras as k
from tensorflow.keras import layers as l
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler


In [46]:
class Data:
    def __init__(self, path: str) -> None:    
        demand_path = path + "/Demand_history.csv"
        existingEV_path = path + "/existing_EV_infrastructure_2018.csv"
        self.df_orig = pd.read_csv(demand_path)
        self.df_trans = self.df_orig
        self.years_window = 2
        self.y_cols = [f"n-{y}" for y in range(1, self.years_window + 1)]
        self.dem_scaler = MinMaxScaler()
        self.coord_scaler = MinMaxScaler()
        self.seq_len = 8
        
    def clean(self):
        self.df_orig.loc[(self.df_orig != 0).any(1)]
        print(self.df_orig)
        
    def process(self):
        all_dem = self.df_trans[
            self.df_trans.columns[self.df_trans.columns.str.startswith('20')]].stack()
        self.dem_scaler.fit(all_dem.values.reshape(-1, 1))
        self.coord_scaler.fit(self.df_trans.loc[:, self.df_trans.columns.str.contains('coord')])
        
        self.x_proc = pd.DataFrame(columns=["x_coordinate", "y_coordinate", *self.y_cols])
        self.y_proc = pd.Series(dtype=np.float64)
        for y in self.df_trans.columns[self.df_trans.columns.str.startswith('20')]:
            y = int(y)
            if y < 2010 + self.years_window:
                continue
            y_cols = [f"{y - i}" for i in range(1, self.years_window + 1)]
            stack_x = self.df_trans.loc[:, self.df_trans.columns.isin(["x_coordinate", "y_coordinate", *y_cols])]
            stack_x.loc[:, ["x_coordinate", "y_coordinate"]] = self.coord_scaler.transform(stack_x.loc[:, ["x_coordinate", "y_coordinate"]])
            for col in y_cols:
                stack_x.loc[:, col] = self.dem_scaler.transform(stack_x.loc[:, col].values.reshape(-1, 1))
            y_dict = {f"{y - i}": f"n-{i}" for i in range(1, self.years_window + 1)}
            stack_x = stack_x.rename(columns=y_dict)
            stack_y = self.df_trans.loc[:, f"{y}"]
            stack_y = pd.Series(self.dem_scaler.transform(stack_y.values.reshape(-1, 1)).flatten())
            self.x_proc = pd.concat([self.x_proc, stack_x], axis=0, ignore_index=True)
            self.y_proc = pd.concat([self.y_proc, stack_y], axis=0, ignore_index=True)
        
        self.x_list = [self.x_proc.iloc[i * self.seq_len: (i + 1) * self.seq_len, :] for i in range(int(self.x_proc.shape[0] / self.seq_len))]
        self.y_list = [self.y_proc.iloc[i * self.seq_len: (i + 1) * self.seq_len] for i in range(int(self.x_proc.shape[0] / self.seq_len))]
        # self.x_list = np.array(self.x_list)
        # self.y_list = np.array(self.y_list)
        self.train_idx = np.random.choice(len(self.x_list), int(len(self.x_list) * 0.8), replace=False)
        self.test_idx = [i for i in range(len(self.x_list)) if i not in self.train_idx]   
        
        
    def datagen(self, kind):
        if kind == 'train':
            idxs = self.train_idx
        elif kind == 'valid':
            idxs = self.test_idx
            
        x = [self.x_list[i].values for i in idxs] 
        y = [self.y_list[i].values for i in idxs] 
        return np.array(x), np.array(y)
                    
    def addYearDemandfromForecast(self, year: int, predicted: np.array) -> None:
        predicted = predicted.flatten().reshape(-1, 1)
        predicted = self.dem_scaler.inverse_transform(predicted)
        self.df_trans[f"{year}"] = abs(predicted)        
                  
path = "data"
data = Data(path)
data.process()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stack_x.loc[:, ["x_coordinate", "y_coordinate"]] = self.coord_scaler.transform(stack_x.loc[:, ["x_coordinate", "y_coordinate"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stack_x.loc[:, col] = self.dem_scaler.transform(stack_x.loc[:, col].values.reshape(-1, 1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stack_x.loc[:, col] = self.dem_scaler.tran

In [50]:
class Model:
    def __init__(self, data: Data) -> None:
        self.data = data
        self.batch_size = 16
        self.filter_size = 64
        self.epochs = 1000
        self.regularizer = k.regularizers.L1L2(l1=0, l2=0)
    
    def createModel(self):
        inputs = l.Input(shape=(data.seq_len, data.years_window + 2))
        cnn1 = l.Conv1D(self.filter_size, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(inputs)
        cnn2 = l.Conv1D(self.filter_size, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(cnn1)
        mp1 = l.MaxPool1D(pool_size=2)(cnn2)
        cnn3 = l.Conv1D(self.filter_size * 2, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(mp1)
        cnn4 = l.Conv1D(self.filter_size * 2, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(cnn3)
        cnn5 = l.Conv1D(self.filter_size * 2, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(cnn4)
        mp1 = l.MaxPool1D(pool_size=2)(cnn5)
        cnn3 = l.Conv1D(self.filter_size * 4, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(mp1)
        cnn4 = l.Conv1D(self.filter_size * 4, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(cnn3)
        cnn5 = l.Conv1D(self.filter_size * 4, kernel_size=3, activation="relu", padding="same",
                        kernel_regularizer=self.regularizer)(cnn4)
        mp2 = l.MaxPool1D(pool_size=2)(cnn5)
        fl = l.Flatten()(mp2)
        do = l.Dropout(0.1)(fl)
        outputs = l.Dense(data.seq_len, activation="relu")(do)
        self.model = k.Model(inputs=inputs, outputs=outputs)
        self.model.compile(optimizer=k.optimizers.Adam(learning_rate=1e-3), loss='mse')
        self.model.summary()
    
    def train(self) -> None:
        
        callbacks = []
        # mp = "/mod/checkpoint"
        # cbcp = k.ModelCheckpoint(mp,
        #             monitor='val_mse', mode="auto", verbose=0,
        #             save_best_only=True, save_weights_only=True, save_freq="epoch")
        cbes = k.callbacks.EarlyStopping(
            monitor="val_loss",
            min_delta=0,
            patience=100,
            verbose=0,
            mode="auto",
            baseline=None,
            restore_best_weights=True,
        )
        callbacks.append(cbes)
        
        rlr = k.callbacks.ReduceLROnPlateau(monitor="val_loss",
            factor=0.5,
            patience=20,
            min_lr=0,
            min_delta=0.00008)
        
        callbacks.append(rlr)
        
        
        x_t, y_t = data.datagen('train')
        x_v, y_v = data.datagen('valid')
        
        self.history = self.model.fit(x_t, y_t, validation_data=(x_v, y_v),
                                      epochs=self.epochs, verbose=1, callbacks=callbacks)
        # self.model.load_weights(mp) 
        
        pred = self.model.predict(x_t)
        train_rmse = np.sqrt(mean_squared_error(y_t, pred))
        train_mae = mean_absolute_error(y_t, pred)   
        train_r2 = r2_score(y_t, pred) 
        pred = self.model.predict(x_v)
        test_rmse = np.sqrt(mean_squared_error(y_v, pred))
        test_mae = mean_absolute_error(y_v, pred)   
        test_r2 = r2_score(y_v, pred) 
           
        
        print(f"training: rmse={train_rmse}, mae={train_mae}, r2={train_r2}")   
        print(f"test: rmse={test_rmse}, mae={test_mae}, r2={test_r2}")                        
    
    def predict(self, year: int) -> pd.Series:
        y_cols = [f"{year - i}" for i in range(1, data.years_window + 1)]
        x_forecast = \
            data.df_trans.loc[:,
                              data.df_trans.columns.isin(["x_coordinate", "y_coordinate", *y_cols])]        
        x_forecast.loc[:, ["x_coordinate", "y_coordinate"]] = data.coord_scaler.transform(x_forecast.loc[:, ["x_coordinate", "y_coordinate"]])
        for col in y_cols:  
            x_forecast.loc[:, col] = data.dem_scaler.transform(x_forecast.loc[:, col].values.reshape(-1, 1))
        x_list = [x_forecast.iloc[i * data.seq_len: (i + 1) * data.seq_len, :] for i in range(int(x_forecast.shape[0] / data.seq_len))]
        x_forecast = np.array(x_list)
        print(x_forecast.shape)
        return self.model.predict(x_forecast)
    
    def set_params(self, params):
        params['n_estimators'] = int(params['n_estimators'])
        params['max_depth'] = int(params['max_depth'])
        self.model.set_params(**params)
        

In [51]:
model = Model(data)

In [52]:
model.createModel()

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 8, 4)]            0         
                                                                 
 conv1d_38 (Conv1D)          (None, 8, 64)             832       
                                                                 
 conv1d_39 (Conv1D)          (None, 8, 64)             12352     
                                                                 
 max_pooling1d_17 (MaxPoolin  (None, 4, 64)            0         
 g1D)                                                            
                                                                 
 conv1d_40 (Conv1D)          (None, 4, 128)            24704     
                                                                 
 conv1d_41 (Conv1D)          (None, 4, 128)            49280     
                                                           

In [53]:
model.train()

Epoch 1/1000


2022-09-16 10:45:39.000574: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-09-16 10:45:41.049389: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


2022-09-16 10:49:41.113628: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


training: rmse=0.0118104977384521, mae=0.007737636392221938, r2=0.9958170838013294
test: rmse=0.030786544355882788, mae=0.01741299123442782, r2=0.9710634096168204


In [54]:
predicted_2019 = model.predict(2019)

(512, 8, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_forecast.loc[:, ["x_coordinate", "y_coordinate"]] = data.coord_scaler.transform(x_forecast.loc[:, ["x_coordinate", "y_coordinate"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_forecast.loc[:, col] = data.dem_scaler.transform(x_forecast.loc[:, col].values.reshape(-1, 1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_forecast.loc[:, col] = data.

In [55]:
data.addYearDemandfromForecast(2019, predicted_2019)

In [56]:
data.df_trans

Unnamed: 0,demand_point_index,x_coordinate,y_coordinate,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,0,0.5,0.5,0.352242,0.667932,0.958593,2.911901,4.338274,6.561995,8.454417,10.595324,13.119572,16.135149
1,1,1.5,0.5,0.325940,0.591964,0.862652,2.589068,4.196034,5.745551,8.753195,11.126995,12.020091,15.068899
2,2,2.5,0.5,0.373752,0.591890,0.969733,2.641432,3.541772,5.469161,8.414627,10.115336,14.018254,15.171862
3,3,3.5,0.5,0.420686,0.584055,0.906547,2.378577,3.888121,5.846089,9.083868,12.424885,15.012302,16.787079
4,4,4.5,0.5,0.475621,0.647940,0.981544,2.665400,4.218711,6.776609,8.851107,11.731131,16.355563,17.157883
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4091,4091,59.5,63.5,0.171015,0.334565,0.556055,1.373291,1.837586,2.517146,3.352280,4.149888,5.426193,7.562072
4092,4092,60.5,63.5,0.041716,0.061741,0.131291,0.386540,0.755846,0.941116,1.107797,1.309479,2.057450,3.184400
4093,4093,61.5,63.5,0.100895,0.180352,0.296299,0.705373,1.300220,1.608609,1.822806,2.333681,3.218519,3.645210
4094,4094,62.5,63.5,0.155353,0.290825,0.557803,1.516066,2.399426,2.719197,4.494515,6.096858,6.262574,11.451424


In [57]:
predicted_2020 = model.predict(2020)

(512, 8, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_forecast.loc[:, ["x_coordinate", "y_coordinate"]] = data.coord_scaler.transform(x_forecast.loc[:, ["x_coordinate", "y_coordinate"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_forecast.loc[:, col] = data.dem_scaler.transform(x_forecast.loc[:, col].values.reshape(-1, 1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [58]:
data.addYearDemandfromForecast(2020, predicted_2020)

In [59]:
data.df_trans

Unnamed: 0,demand_point_index,x_coordinate,y_coordinate,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0,0.5,0.5,0.352242,0.667932,0.958593,2.911901,4.338274,6.561995,8.454417,10.595324,13.119572,16.135149,19.231125
1,1,1.5,0.5,0.325940,0.591964,0.862652,2.589068,4.196034,5.745551,8.753195,11.126995,12.020091,15.068899,17.595203
2,2,2.5,0.5,0.373752,0.591890,0.969733,2.641432,3.541772,5.469161,8.414627,10.115336,14.018254,15.171862,17.007502
3,3,3.5,0.5,0.420686,0.584055,0.906547,2.378577,3.888121,5.846089,9.083868,12.424885,15.012302,16.787079,18.480434
4,4,4.5,0.5,0.475621,0.647940,0.981544,2.665400,4.218711,6.776609,8.851107,11.731131,16.355563,17.157883,18.493801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4091,4091,59.5,63.5,0.171015,0.334565,0.556055,1.373291,1.837586,2.517146,3.352280,4.149888,5.426193,7.562072,11.123329
4092,4092,60.5,63.5,0.041716,0.061741,0.131291,0.386540,0.755846,0.941116,1.107797,1.309479,2.057450,3.184400,4.729700
4093,4093,61.5,63.5,0.100895,0.180352,0.296299,0.705373,1.300220,1.608609,1.822806,2.333681,3.218519,3.645210,5.762763
4094,4094,62.5,63.5,0.155353,0.290825,0.557803,1.516066,2.399426,2.719197,4.494515,6.096858,6.262574,11.451424,17.879885


In [39]:
data.df_trans.to_csv("data/forecast.csv")