### Time encoding & Min-Max normalization for numerical columns

In [1]:
import os, random, json, time, argparse, warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd
import math

from glob import glob
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

In [2]:
data = pd.read_csv('./data/Tetuan City power consumption.csv')
data

Unnamed: 0,DateTime,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,34055.69620,16128.87538,20240.96386
1,1/1/2017 0:10,6.414,74.5,0.083,0.070,0.085,29814.68354,19375.07599,20131.08434
2,1/1/2017 0:20,6.313,74.5,0.080,0.062,0.100,29128.10127,19006.68693,19668.43373
3,1/1/2017 0:30,6.121,75.0,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,27335.69620,17872.34043,18442.40964
...,...,...,...,...,...,...,...,...,...
52411,12/30/2017 23:10,7.010,72.4,0.080,0.040,0.096,31160.45627,26857.31820,14780.31212
52412,12/30/2017 23:20,6.947,72.6,0.082,0.051,0.093,30430.41825,26124.57809,14428.81152
52413,12/30/2017 23:30,6.900,72.8,0.086,0.084,0.074,29590.87452,25277.69254,13806.48259
52414,12/30/2017 23:40,6.758,73.0,0.080,0.066,0.089,28958.17490,24692.23688,13512.60504


In [3]:
data.info()
# DateTime: Categorical -> Time encoding
# Rest of the columns: Numerical

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52416 entries, 0 to 52415
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   DateTime                   52416 non-null  object 
 1   Temperature                52416 non-null  float64
 2   Humidity                   52416 non-null  float64
 3   Wind Speed                 52416 non-null  float64
 4   general diffuse flows      52416 non-null  float64
 5   diffuse flows              52416 non-null  float64
 6   Zone 1 Power Consumption   52416 non-null  float64
 7   Zone 2  Power Consumption  52416 non-null  float64
 8   Zone 3  Power Consumption  52416 non-null  float64
dtypes: float64(8), object(1)
memory usage: 3.6+ MB


In [4]:
def transformation(column):
  max_value = column.max()
  sin_values = [math.sin((2*math.pi*x)/max_value) for x in list(column)]
  cos_values = [math.cos((2*math.pi*x)/max_value) for x in list(column)]
  return sin_values, cos_values

In [5]:
month_list = []
day_list = []
hour_list = []
minute_list = []

dayofyear_list = []
weekofyear_list = []
dayofweek_list = []
quarter_list = []

month_num_days = {0:0, 1:31, 2:59, 3:90, 4:120, 5:151, 6:181, 7:212, 8:243, 9:273, 10:304, 11:334, 12:364}

for index, row in data.iterrows():
    c, t = row['DateTime'].split()
    month, day, year = c.split('/')
    hour, minute = t.split(':')

    month_list.append(int(month))
    day_list.append(int(day))
    hour_list.append(int(hour))
    minute_list.append(int(minute))

    dayofyear = month_num_days[int(month)-1] + int(day)
    weekofyear = dayofyear // 7 + 1
    dayofweek = dayofyear % 7
    quarter = dayofyear % 4

    dayofyear_list.append(dayofyear)
    weekofyear_list.append(weekofyear)
    dayofweek_list.append(dayofweek)
    quarter_list.append(quarter)

data['month'] = month_list
data['day'] = day_list
data['hour'] = hour_list
data['minute'] = minute_list
data['dayofyear'] = dayofyear_list
data['weekofyear'] = weekofyear_list
data['dayofweek'] = dayofweek_list
data['quarter'] = quarter_list
data.drop(['DateTime'], axis=1, inplace=True)
data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month,day,hour,minute,dayofyear,weekofyear,dayofweek,quarter
0,6.559,73.8,0.083,0.051,0.119,34055.69620,16128.87538,20240.96386,1,1,0,0,1,1,1,1
1,6.414,74.5,0.083,0.070,0.085,29814.68354,19375.07599,20131.08434,1,1,0,10,1,1,1,1
2,6.313,74.5,0.080,0.062,0.100,29128.10127,19006.68693,19668.43373,1,1,0,20,1,1,1,1
3,6.121,75.0,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711,1,1,0,30,1,1,1,1
4,5.921,75.7,0.081,0.048,0.085,27335.69620,17872.34043,18442.40964,1,1,0,40,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52411,7.010,72.4,0.080,0.040,0.096,31160.45627,26857.31820,14780.31212,12,30,23,10,364,53,0,0
52412,6.947,72.6,0.082,0.051,0.093,30430.41825,26124.57809,14428.81152,12,30,23,20,364,53,0,0
52413,6.900,72.8,0.086,0.084,0.074,29590.87452,25277.69254,13806.48259,12,30,23,30,364,53,0,0
52414,6.758,73.0,0.080,0.066,0.089,28958.17490,24692.23688,13512.60504,12,30,23,40,364,53,0,0


In [6]:
data['dayofweek'][data['dayofweek'] == 0] = 7
data['quarter'][data['quarter'] == 0] = 4

data.drop(data[data.minute != 0].index, inplace=True)
data.reset_index(drop=True, inplace=True)
data.drop(['minute'], axis=1, inplace=True)
data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month,day,hour,dayofyear,weekofyear,dayofweek,quarter
0,6.559,73.80,0.083,0.051,0.119,34055.69620,16128.87538,20240.96386,1,1,0,1,1,1,1
1,5.641,77.70,0.080,0.048,0.096,25998.98734,16993.31307,17945.06024,1,1,1,1,1,1,1
2,5.059,78.60,0.081,0.070,0.096,23003.54430,15169.60486,16117.59036,1,1,2,1,1,1,1
3,5.169,77.90,0.083,0.066,0.108,21107.84810,13535.56231,15140.24096,1,1,3,1,1,1,1
4,4.753,75.70,0.083,0.044,0.134,20524.55696,12820.66869,14585.06024,1,1,4,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,9.920,61.15,0.073,0.062,0.096,37694.29658,32623.50414,16825.93037,12,30,19,364,53,7,4
8732,9.020,63.38,0.075,0.048,0.130,37146.76806,32373.12059,16941.17647,12,30,20,364,53,7,4
8733,8.800,65.49,0.082,0.088,0.115,36343.72624,31345.81160,16877.79112,12,30,21,364,53,7,4
8734,8.090,68.49,0.079,0.070,0.085,34920.15209,29445.84228,16053.78151,12,30,22,364,53,7,4


In [7]:
month_sin, month_cos = transformation(data['month'])
day_sin, day_cos = transformation(data['day'])
hour_sin, hour_cos = transformation(data['hour'])
dayofyear_sin, dayofyear_cos = transformation(data['dayofyear'])
weekofyear_sin, weekofyear_cos = transformation(data['weekofyear'])
dayofweek_sin, dayofweek_cos = transformation(data['dayofweek'])
quarter_sin, quarter_cos = transformation(data['quarter'])

In [8]:
calendar_data = pd.DataFrame({
    'month_sin': month_sin,
    'month_cos': month_cos,
    'day_sin' : day_sin,
    'day_cos' : day_cos,
    'hour_sin' : hour_sin,
    'hour_cos' : hour_cos,
    'dayofyear_sin' : dayofyear_sin,
    'dayofyear_cos' : dayofyear_cos,
    'weekofyear_sin' : weekofyear_sin,
    'weekofyear_cos' : weekofyear_cos,
    'dayofweek_sin' : dayofweek_sin,
    'dayofweek_cos' : dayofweek_cos,
    'quarter_sin' : quarter_sin,
    'quarter_cos' : quarter_cos 
})
calendar_data

Unnamed: 0,month_sin,month_cos,day_sin,day_cos,hour_sin,hour_cos,dayofyear_sin,dayofyear_cos,weekofyear_sin,weekofyear_cos,dayofweek_sin,dayofweek_cos,quarter_sin,quarter_cos
0,5.000000e-01,0.866025,0.201299,0.97953,0.000000e+00,1.000000,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
1,5.000000e-01,0.866025,0.201299,0.97953,2.697968e-01,0.962917,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
2,5.000000e-01,0.866025,0.201299,0.97953,5.195840e-01,0.854419,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
3,5.000000e-01,0.866025,0.201299,0.97953,7.308360e-01,0.682553,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
4,5.000000e-01,0.866025,0.201299,0.97953,8.878852e-01,0.460065,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,-2.449294e-16,1.000000,-0.201299,0.97953,-8.878852e-01,0.460065,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8732,-2.449294e-16,1.000000,-0.201299,0.97953,-7.308360e-01,0.682553,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8733,-2.449294e-16,1.000000,-0.201299,0.97953,-5.195840e-01,0.854419,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8734,-2.449294e-16,1.000000,-0.201299,0.97953,-2.697968e-01,0.962917,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00


In [9]:
data.drop(['month','day','hour','dayofyear','weekofyear','dayofweek','quarter'], axis=1, inplace=True)
data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption
0,6.559,73.80,0.083,0.051,0.119,34055.69620,16128.87538,20240.96386
1,5.641,77.70,0.080,0.048,0.096,25998.98734,16993.31307,17945.06024
2,5.059,78.60,0.081,0.070,0.096,23003.54430,15169.60486,16117.59036
3,5.169,77.90,0.083,0.066,0.108,21107.84810,13535.56231,15140.24096
4,4.753,75.70,0.083,0.044,0.134,20524.55696,12820.66869,14585.06024
...,...,...,...,...,...,...,...,...
8731,9.920,61.15,0.073,0.062,0.096,37694.29658,32623.50414,16825.93037
8732,9.020,63.38,0.075,0.048,0.130,37146.76806,32373.12059,16941.17647
8733,8.800,65.49,0.082,0.088,0.115,36343.72624,31345.81160,16877.79112
8734,8.090,68.49,0.079,0.070,0.085,34920.15209,29445.84228,16053.78151


In [10]:
# Rest of the columns: min-max normalization
minmax_scaler = MinMaxScaler()
data[data.columns] = minmax_scaler.fit_transform(data[data.columns])
data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption
0,0.082755,0.748587,0.005851,0.000038,0.000099,0.524307,0.262150,0.343517
1,0.057583,0.795501,0.005319,0.000035,0.000073,0.310192,0.293487,0.288387
2,0.041624,0.806327,0.005496,0.000054,0.000073,0.230585,0.227376,0.244505
3,0.044641,0.797907,0.005851,0.000051,0.000086,0.180205,0.168140,0.221036
4,0.033234,0.771442,0.005851,0.000032,0.000115,0.164704,0.142224,0.207705
...,...,...,...,...,...,...,...,...
8731,0.174916,0.596415,0.004078,0.000047,0.000073,0.621006,0.860097,0.261514
8732,0.150237,0.623241,0.004433,0.000035,0.000111,0.606455,0.851020,0.264281
8733,0.144205,0.648623,0.005674,0.000070,0.000094,0.585113,0.813779,0.262759
8734,0.124736,0.684711,0.005142,0.000054,0.000061,0.547281,0.744904,0.242972


In [11]:
normalized_data = pd.concat([data, calendar_data], axis=1)
normalized_data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month_sin,month_cos,...,hour_sin,hour_cos,dayofyear_sin,dayofyear_cos,weekofyear_sin,weekofyear_cos,dayofweek_sin,dayofweek_cos,quarter_sin,quarter_cos
0,0.082755,0.748587,0.005851,0.000038,0.000099,0.524307,0.262150,0.343517,5.000000e-01,0.866025,...,0.000000e+00,1.000000,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
1,0.057583,0.795501,0.005319,0.000035,0.000073,0.310192,0.293487,0.288387,5.000000e-01,0.866025,...,2.697968e-01,0.962917,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
2,0.041624,0.806327,0.005496,0.000054,0.000073,0.230585,0.227376,0.244505,5.000000e-01,0.866025,...,5.195840e-01,0.854419,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
3,0.044641,0.797907,0.005851,0.000051,0.000086,0.180205,0.168140,0.221036,5.000000e-01,0.866025,...,7.308360e-01,0.682553,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
4,0.033234,0.771442,0.005851,0.000032,0.000115,0.164704,0.142224,0.207705,5.000000e-01,0.866025,...,8.878852e-01,0.460065,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,0.174916,0.596415,0.004078,0.000047,0.000073,0.621006,0.860097,0.261514,-2.449294e-16,1.000000,...,-8.878852e-01,0.460065,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8732,0.150237,0.623241,0.004433,0.000035,0.000111,0.606455,0.851020,0.264281,-2.449294e-16,1.000000,...,-7.308360e-01,0.682553,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8733,0.144205,0.648623,0.005674,0.000070,0.000094,0.585113,0.813779,0.262759,-2.449294e-16,1.000000,...,-5.195840e-01,0.854419,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8734,0.124736,0.684711,0.005142,0.000054,0.000061,0.547281,0.744904,0.242972,-2.449294e-16,1.000000,...,-2.697968e-01,0.962917,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00


In [13]:
normalized_data.to_csv('./data/normalized_data.csv')

### Train/Validation/Test split

In [14]:
np.random.seed(2022)
unit_len = int(len(normalized_data) * 0.1)
train_len = unit_len * 7
val_len = unit_len * 1
print(train_len, val_len, len(normalized_data)-train_len-val_len)

6111 873 1752


In [15]:
train = normalized_data[:train_len]
valid = normalized_data[train_len:train_len+val_len]
test = normalized_data[train_len+val_len:]

In [16]:
train.to_csv('./data/train.csv', index=False)
valid.to_csv('./data/valid.csv', index=False)
test.to_csv('./data/test.csv', index=False)

### LPA/FPA/sFPA/tFPA (noise-injected) data

In [1]:
import os, random, json, time, argparse, warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd
import math

from glob import glob
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler

In [2]:
perturbed_data = pd.read_csv('./data/LPA/newLPAepsilon1.csv', index_col=0)
perturbed_data.reset_index(drop=True, inplace=True)
perturbed_data

Unnamed: 0,hZone1,hZone2,hZone3
0,34055.219280,16129.371128,20241.211524
1,25998.326545,16992.228646,17945.306218
2,23005.142082,15169.415935,16117.262747
3,21109.438625,13535.338349,15140.313879
4,20524.969156,12820.706737,14584.653503
...,...,...,...
8731,37694.136940,32623.156606,16826.815774
8732,37147.193434,32372.691938,16940.954046
8733,36342.948498,31346.559642,16877.707521
8734,34918.707398,29445.379052,16053.565203


In [3]:
normalized_data = pd.read_csv('./data/normalized_data.csv', index_col=0)
normalized_data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month_sin,month_cos,...,hour_sin,hour_cos,dayofyear_sin,dayofyear_cos,weekofyear_sin,weekofyear_cos,dayofweek_sin,dayofweek_cos,quarter_sin,quarter_cos
0,0.082755,0.748587,0.005851,0.000038,0.000099,0.524307,0.262150,0.343517,5.000000e-01,0.866025,...,0.000000e+00,1.000000,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
1,0.057583,0.795501,0.005319,0.000035,0.000073,0.310192,0.293487,0.288387,5.000000e-01,0.866025,...,2.697968e-01,0.962917,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
2,0.041624,0.806327,0.005496,0.000054,0.000073,0.230585,0.227376,0.244505,5.000000e-01,0.866025,...,5.195840e-01,0.854419,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
3,0.044641,0.797907,0.005851,0.000051,0.000086,0.180205,0.168140,0.221036,5.000000e-01,0.866025,...,7.308360e-01,0.682553,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
4,0.033234,0.771442,0.005851,0.000032,0.000115,0.164704,0.142224,0.207705,5.000000e-01,0.866025,...,8.878852e-01,0.460065,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,0.174916,0.596415,0.004078,0.000047,0.000073,0.621006,0.860097,0.261514,-2.449294e-16,1.000000,...,-8.878852e-01,0.460065,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8732,0.150237,0.623241,0.004433,0.000035,0.000111,0.606455,0.851020,0.264281,-2.449294e-16,1.000000,...,-7.308360e-01,0.682553,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8733,0.144205,0.648623,0.005674,0.000070,0.000094,0.585113,0.813779,0.262759,-2.449294e-16,1.000000,...,-5.195840e-01,0.854419,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8734,0.124736,0.684711,0.005142,0.000054,0.000061,0.547281,0.744904,0.242972,-2.449294e-16,1.000000,...,-2.697968e-01,0.962917,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00


In [4]:
normalized_data['Zone 1 Power Consumption'] = perturbed_data['hZone1']
normalized_data['Zone 2  Power Consumption'] = perturbed_data['hZone2']
normalized_data['Zone 3  Power Consumption'] = perturbed_data['hZone3']
normalized_data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month_sin,month_cos,...,hour_sin,hour_cos,dayofyear_sin,dayofyear_cos,weekofyear_sin,weekofyear_cos,dayofweek_sin,dayofweek_cos,quarter_sin,quarter_cos
0,0.082755,0.748587,0.005851,0.000038,0.000099,34055.219280,16129.371128,20241.211524,5.000000e-01,0.866025,...,0.000000e+00,1.000000,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
1,0.057583,0.795501,0.005319,0.000035,0.000073,25998.326545,16992.228646,17945.306218,5.000000e-01,0.866025,...,2.697968e-01,0.962917,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
2,0.041624,0.806327,0.005496,0.000054,0.000073,23005.142082,15169.415935,16117.262747,5.000000e-01,0.866025,...,5.195840e-01,0.854419,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
3,0.044641,0.797907,0.005851,0.000051,0.000086,21109.438625,13535.338349,15140.313879,5.000000e-01,0.866025,...,7.308360e-01,0.682553,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
4,0.033234,0.771442,0.005851,0.000032,0.000115,20524.969156,12820.706737,14584.653503,5.000000e-01,0.866025,...,8.878852e-01,0.460065,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,0.174916,0.596415,0.004078,0.000047,0.000073,37694.136940,32623.156606,16826.815774,-2.449294e-16,1.000000,...,-8.878852e-01,0.460065,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8732,0.150237,0.623241,0.004433,0.000035,0.000111,37147.193434,32372.691938,16940.954046,-2.449294e-16,1.000000,...,-7.308360e-01,0.682553,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8733,0.144205,0.648623,0.005674,0.000070,0.000094,36342.948498,31346.559642,16877.707521,-2.449294e-16,1.000000,...,-5.195840e-01,0.854419,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8734,0.124736,0.684711,0.005142,0.000054,0.000061,34918.707398,29445.379052,16053.565203,-2.449294e-16,1.000000,...,-2.697968e-01,0.962917,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00


In [5]:
# Re-normalize three newly added columns
minmax_scaler = MinMaxScaler()
normalized_data[['Zone 1 Power Consumption', 'Zone 2  Power Consumption', 'Zone 3  Power Consumption']] = minmax_scaler.fit_transform(normalized_data[['Zone 1 Power Consumption', 'Zone 2  Power Consumption', 'Zone 3  Power Consumption']])
normalized_data

Unnamed: 0,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption,month_sin,month_cos,...,hour_sin,hour_cos,dayofyear_sin,dayofyear_cos,weekofyear_sin,weekofyear_cos,dayofweek_sin,dayofweek_cos,quarter_sin,quarter_cos
0,0.082755,0.748587,0.005851,0.000038,0.000099,0.524289,0.262169,0.343539,5.000000e-01,0.866025,...,0.000000e+00,1.000000,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
1,0.057583,0.795501,0.005319,0.000035,0.000073,0.310165,0.293449,0.288410,5.000000e-01,0.866025,...,2.697968e-01,0.962917,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
2,0.041624,0.806327,0.005496,0.000054,0.000073,0.230616,0.227370,0.244516,5.000000e-01,0.866025,...,5.195840e-01,0.854419,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
3,0.044641,0.797907,0.005851,0.000051,0.000086,0.180235,0.168133,0.221058,5.000000e-01,0.866025,...,7.308360e-01,0.682553,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
4,0.033234,0.771442,0.005851,0.000032,0.000115,0.164701,0.142227,0.207715,5.000000e-01,0.866025,...,8.878852e-01,0.460065,1.726064e-02,0.999851,1.182732e-01,0.992981,7.818315e-01,0.62349,1.000000e+00,6.123234e-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8731,0.174916,0.596415,0.004078,0.000047,0.000073,0.620999,0.860083,0.261553,-2.449294e-16,1.000000,...,-8.878852e-01,0.460065,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8732,0.150237,0.623241,0.004433,0.000035,0.000111,0.606464,0.851003,0.264294,-2.449294e-16,1.000000,...,-7.308360e-01,0.682553,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8733,0.144205,0.648623,0.005674,0.000070,0.000094,0.585090,0.813805,0.262775,-2.449294e-16,1.000000,...,-5.195840e-01,0.854419,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00
8734,0.124736,0.684711,0.005142,0.000054,0.000061,0.547238,0.744886,0.242986,-2.449294e-16,1.000000,...,-2.697968e-01,0.962917,-2.449294e-16,1.000000,-2.449294e-16,1.000000,-2.449294e-16,1.00000,-2.449294e-16,1.000000e+00


In [6]:
normalized_data.to_csv('./data/LPA/normalized_newLPAepsilon1.csv')

### Re- train/val/test split

In [7]:
np.random.seed(2022)
unit_len = int(len(normalized_data) * 0.1)
train_len = unit_len * 7
val_len = unit_len * 1
print(train_len, val_len, len(normalized_data)-train_len-val_len)

6111 873 1752


In [8]:
train = normalized_data[:train_len]
valid = normalized_data[train_len:train_len+val_len]
test = normalized_data[train_len+val_len:]

In [9]:
train.to_csv('./data/LPA/train_eps_new.csv', index=False)
valid.to_csv('./data/LPA/valid_eps_new.csv', index=False)
test.to_csv('./data/LPA/test_eps_new.csv', index=False)