In [1]:
import numpy as np
import pandas as pd
import datetime
import gc
import multiprocessing as mp
import random
import math
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
def read_df(file_title,house_nb):
    try:
        df=pd.read_table(file_title, sep="," )
        df.columns = ["time","consumption"]
        try:
            df['time'] = pd.to_datetime(df['time'],unit='s')
        except:
            df['time'] = pd.to_datetime(df['time'])
        df.index= df.time
        df = df.sort_index()
        df.reset_index(drop=True, inplace=True)
        return (df,house_nb)
    except:
        return(None)

In [3]:
Redd_fridges={}
for i in [1,2,3,5,6]:
    Redd_fridges[i]=read_df("Redd_Resampled/house_"+str(i)+".csv",i)[0]

In [4]:
pool = mp.Pool(processes = 3)
result=pool.starmap(read_df,(("Refit_Resampled/fridge_freezer/house_"+str(i+1)+".csv",i+1) for i in range(21)))
Refit_fridge_freezers={}
for i in range(len(result)):
    if result[i]!=None:
        Refit_fridge_freezers[result[i][1]]=result[i][0]

In [5]:
pool = mp.Pool(processes = 3)
result=pool.starmap(read_df,(("Refit_Resampled/fridge/house_"+str(i+1)+".csv",i+1) for i in range(21)))
Refit_fridges={}
for i in range(len(result)):
    if result[i]!=None:
        Refit_fridges[result[i][1]]=result[i][0]

In [6]:
def csv_to_activations(file_title):
    df=pd.read_table(file_title, sep="," )
    df = df[['beginning', 'end','label']]
    activation = [tuple(x[0:2]) for x in df.to_numpy()]
    activation_type = [x[2] for x in df.to_numpy()]
    return(activation,activation_type)

In [7]:
Refit_fridge_freezer_activation_type={}
Refit_fridge_freezer_activations={}
for house_nb in range(22):
    try:
        Refit_fridge_freezer_activations[house_nb]=csv_to_activations("Refit_activations/labels/fridge_freezer_resampled/house_"+str(house_nb))[0]
        Refit_fridge_freezer_activation_type[house_nb]=csv_to_activations("Refit_activations/labels/fridge_freezer_resampled/house_"+str(house_nb))[1]
    except:
        None

In [8]:
Refit_fridge_activation_type={}
Refit_fridge_activations={}
for house_nb in range(22):
    try:
        Refit_fridge_activations[house_nb]=csv_to_activations("Refit_activations/labels/fridge_resampled/house_"+str(house_nb))[0]
        Refit_fridge_activation_type[house_nb]=csv_to_activations("Refit_activations/labels/fridge_resampled/house_"+str(house_nb))[1]
    except:
        None

In [9]:
Redd_activation_type={}
Redd_activations={}
for house_nb in [1,2,3,5,6]:
    Redd_activations[house_nb]=csv_to_activations("Redd/redd_oversampled_labeled_activations_"+str(house_nb)+".csv")[0]
    Redd_activation_type[house_nb]=csv_to_activations("Redd/redd_oversampled_labeled_activations_"+str(house_nb)+".csv")[1]

In [10]:
def normal_anomolous(activations,activation_type,house_nb):
    normal_activations=[]
    anomolous_activations=[]
    for i in range(len(activations)):
        if activation_type[i]==0:
            normal_activations.append(activations[i])
        else:
            anomolous_activations.append(activations[i])
    return(normal_activations,anomolous_activations,house_nb)

In [11]:
Refit_fridge_freezer_normal_activations={}
Refit_fridge_freezer_anomolous_activations={}
for house_nb in Refit_fridge_freezer_activations.keys():
    result=normal_anomolous(Refit_fridge_freezer_activations[house_nb],Refit_fridge_freezer_activation_type[house_nb],house_nb)
    Refit_fridge_freezer_normal_activations[house_nb]=result[0]
    Refit_fridge_freezer_anomolous_activations[house_nb]=result[1]

In [12]:
Refit_fridge_normal_activations={}
Refit_fridge_anomolous_activations={}
for house_nb in Refit_fridge_activations.keys():
    result=normal_anomolous(Refit_fridge_activations[house_nb],Refit_fridge_activation_type[house_nb],house_nb)
    Refit_fridge_normal_activations[house_nb]=result[0]
    Refit_fridge_anomolous_activations[house_nb]=result[1]

In [13]:
Redd_normal_activations={}
Redd_anomolous_activations={}
for house_nb in [1,2,3,5,6]:
    result=normal_anomolous(Redd_activations[house_nb],Redd_activation_type[house_nb],house_nb)
    Redd_normal_activations[house_nb]=result[0]
    Redd_anomolous_activations[house_nb]=result[1]

In [14]:
def bigger_activations_ffill(df,activation,windowlength):
    begin=activation[0]
    end=activation[1]
    new_activation=[]
    windowlength=windowlength*6
    nb_full_activations=math.ceil((end-begin)/windowlength)
    for i in range (nb_full_activations):
        new_activation.append(list(df.consumption.iloc[begin+i*windowlength:begin+(i+1)*windowlength]))
    return (new_activation)

In [15]:
def small_activations_ffill(df,activation,windowlength):
    begin=activation[0]
    end=activation[1]
    difference=windowlength*6+begin-end
    new_activation=[]
    if ((len(df.index)-activation[1])>=difference):
        new_activation=list(df.consumption.iloc[begin:end+difference])
    else:
        new_activation=list(df.consumption.iloc[begin-difference:end])
    return(new_activation)

In [16]:
def get_x_y(df,activations,activation_type,windowlength):
    x=[]
    y=[]
    for i in range(len(activations)):
        activation=activations[i]
        label=activation_type[i]
        if (activation[1]-activation[0])<=windowlength*6:
            x.append(small_activations_ffill(df,activation,windowlength))
            y.append(label)
        else:
            x.extend(bigger_activations_ffill(df,activation,windowlength))
            while(len(y)<len(x)):
                y.append(label)
    return(x,y)

In [17]:
test_train_fridge_houses=list(Refit_fridge_activations.keys())
test_train_fridge_freezer_houses=list(Refit_fridge_freezer_activations.keys())
test_train_redd=list(Redd_activations.keys())

test_train_fridge_houses.remove(11)
test_train_fridge_freezer_houses.remove(11)
test_train_redd.remove(5)
evaluation_houses=[11]
evaluation_houses_redd=[5]

test_train_fridge_freezer_houses.remove(3)
test_train_fridge_freezer_houses.remove(16)
test_train_toadd_freezer=[3,16]

In [18]:
pool = mp.Pool(processes = 3)
result=pool.starmap(get_x_y,((Refit_fridges[i],Refit_fridge_activations[i],Refit_fridge_activation_type[i],45) for i in test_train_fridge_houses))

In [19]:
x_total_refit=[]
y_total_refit=[]
for i in range(len(result)):
    x_total_refit.extend(result[i][0])
    y_total_refit.extend(result[i][1])

In [20]:
pool = mp.Pool(processes = 3)
result=pool.starmap(get_x_y,((Refit_fridge_freezers[i],Refit_fridge_freezer_activations[i],Refit_fridge_freezer_activation_type[i],45) for i in test_train_fridge_freezer_houses))

In [21]:
for i in range(len(result)):
    x_total_refit.extend(result[i][0])
    y_total_refit.extend(result[i][1])

In [22]:
pool = mp.Pool(processes = 3)
result=pool.starmap(get_x_y,((Redd_fridges[i],Redd_activations[i],Redd_activation_type[i],45) for i in test_train_redd))

In [23]:
x_total_redd=[]
y_total_redd=[]
for i in range(len(result)):
    x_total_redd.extend(result[i][0])
    y_total_redd.extend(result[i][1])

In [18]:
x_unseen=[]
y_unseen=[]
x_unseen.extend(get_x_y(Refit_fridges[11],Refit_fridge_activations[11],Refit_fridge_activation_type[11],45)[0])
y_unseen.extend(get_x_y(Refit_fridges[11],Refit_fridge_activations[11],Refit_fridge_activation_type[11],45)[1])

In [19]:
x_unseen.extend(get_x_y(Refit_fridge_freezers[11],Refit_fridge_freezer_activations[11],Refit_fridge_freezer_activation_type[11],45)[0])
y_unseen.extend(get_x_y(Refit_fridge_freezers[11],Refit_fridge_freezer_activations[11],Refit_fridge_freezer_activation_type[11],45)[1])

In [20]:
x_unseen.extend(get_x_y(Redd_fridges[5],Redd_activations[5],Redd_activation_type[5],45)[0])
y_unseen.extend(get_x_y(Redd_fridges[5],Redd_activations[5],Redd_activation_type[5],45)[1])

In [21]:
x_normal_unseen=[]
x_anomolous_unseen=[]
nb_normal=0
nb_abnormal=0
for i in range(len(y_unseen)):
    if y_unseen[i]==0:
        x_normal_unseen.append(x_unseen[i])
        nb_normal+=1
    else:
        x_anomolous_unseen.append(x_unseen[i])
        nb_abnormal+=1
print(nb_normal)
print(len(x_normal_unseen))
print(nb_abnormal)
print(len(x_anomolous_unseen))

6638
6638
8239
8239


In [24]:
x_normal_redd=[]
x_anomolous_redd=[]
nb_normal=0
nb_abnormal=0
for i in range(len(y_total_redd)):
    if y_total_redd[i]==0:
        x_normal_redd.append(x_total_redd[i])
        nb_normal+=1
    else:
        x_anomolous_redd.append(x_total_redd[i])
        nb_abnormal+=1
print(nb_normal)
print(len(x_normal_redd))
print(nb_abnormal)
print(len(x_anomolous_redd))

1631
1631
1878
1878


In [25]:
x_normal=[]
x_anomolous=[]
nb_normal=0
nb_abnormal=0
for i in range(len(y_total_refit)):
    if y_total_refit[i]==0:
        x_normal.append(x_total_refit[i])
        nb_normal+=1
    else:
        x_anomolous.append(x_total_refit[i])
        nb_abnormal+=1
print(nb_normal)
print(len(x_normal))
print(nb_abnormal)
print(len(x_anomolous))

161731
161731
198000
198000


In [23]:
def oversampled_normal(x_normal,x_anomolous):
    normal_nb=len(x_normal)
    abnormal_nb=len(x_anomolous)
    oversampling_nb=abnormal_nb-normal_nb
    oversampled_list=[]
    for i in range(0,oversampling_nb):
        oversampled_list.append(random.choice(x_normal))
    return (oversampled_list)

In [27]:
x_total_refit.extend(oversampled_normal(x_normal,x_anomolous))
while(len(y_total_refit)<len(x_total_refit)):
    y_total_refit.append(0)

In [28]:
x_total_redd.extend(oversampled_normal(x_normal_redd,x_anomolous_redd))
while(len(y_total_redd)<len(x_total_redd)):
    y_total_redd.append(0)

In [22]:
x_unseen=np.array(x_unseen)
y_unseen=np.array(y_unseen)

In [23]:
type(x_unseen[0])

numpy.ndarray

In [29]:
x_total_refit=np.array(x_total_refit)
y_total_refit=np.array(y_total_refit)

In [30]:
type(x_total_refit[0])

numpy.ndarray

In [31]:
x_total_redd=np.array(x_total_redd)
y_total_redd=np.array(y_total_redd)

In [32]:
x_train_refit, x_test_refit, y_train_refit, y_test_refit = train_test_split(x_total_refit, y_total_refit, test_size=0.3)

In [33]:
x_train_redd, x_test_redd, y_train_redd, y_test_redd = train_test_split(x_total_redd, y_total_redd, test_size=0.3)

In [34]:
x_train=np.concatenate((x_train_refit, x_train_redd), axis=0)
y_train=np.concatenate((y_train_refit, y_train_redd), axis=0)
x_test=np.concatenate((x_test_refit, x_test_redd), axis=0)
y_test=np.concatenate((y_test_refit, y_test_redd), axis=0)

In [35]:
x_train=np.array(x_train)
x_test=np.array(x_test)

In [36]:
y=y_train
y_train=[]
for i in range(len(y)):
    y_train.append(np.array([y[i]]))
y_train=np.array(y_train)

In [37]:
y=y_test
y_test=[]
for i in range(len(y)):
    y_test.append(np.array([y[i]]))
y_test=np.array(y_test)

In [25]:
y=y_unseen
y_unseen=[]
for i in range(len(y)):
    y_unseen.append(np.array([y[i]]))
y_unseen=np.array(y_unseen)

In [26]:
y_unseen.shape

(14877, 1)

In [38]:
y_test.shape

(119927, 1)

In [39]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(279829, 270)
(119927, 270)
(279829, 1)
(119927, 1)


In [40]:
x_train[0]

array([   0.,    0., 1018.,  105.,  107.,  109.,  104.,  110.,  102.,
        102.,  105.,  102.,  103.,  103.,  100.,   96.,   95.,   95.,
         95.,   94.,   95.,   95.,   95.,   95.,   94.,   95.,   95.,
         95.,   93.,   92.,   93.,   93.,   92.,   91.,   92.,   91.,
         91.,   90.,   90.,   90.,   90.,   90.,   90.,   89.,   89.,
         89.,   89.,   89.,   88.,   88.,   88.,   87.,   87.,   87.,
         88.,   88.,   88.,   87.,   87.,   87.,   88.,   88.,   87.,
         87.,   87.,   87.,   87.,   87.,   87.,   85.,   86.,   86.,
         86.,   86.,   86.,   86.,   85.,   84.,   85.,   85.,   84.,
         85.,   84.,   84.,   85.,   84.,   84.,   85.,   84.,   84.,
         84.,   84.,   84.,   83.,   84.,   83.,   83.,   83.,   83.,
         83.,   83.,   83.,   83.,   83.,   83.,   82.,   83.,   83.,
         83.,   83.,   83.,   82.,   82.,   83.,   82.,   82.,   82.,
         83.,   82.,   82.,   82.,   82.,   82.,   82.,   81.,   82.,
         82.,   81.,

In [41]:
y_train[0]

array([1])

In [42]:
mean=x_train.mean()
std=x_train.std()

In [43]:
print(mean)
print(std)

61.2186529657067
77.31717709962946


In [27]:
mean=61.2186529657067
std=77.31717709962946

In [44]:
x_train=x_train-mean
x_test=x_test-mean

In [45]:
print(x_train.mean())
print(x_test.mean())

3.1933623283733256e-14
0.081236164378277


In [46]:
x_train=x_train/std
x_test=x_test/std

In [47]:
print(x_train.std())
print(x_test.std())

0.9999999999999973
1.0046180059309586


In [28]:
x_unseen=x_unseen-mean
x_unseen=x_unseen/std

In [29]:
print(x_unseen.mean())
print(x_unseen.std())

-0.18082058194695144
1.4450238820728696


In [38]:
x_unseen[2]

array([-0.79179, -0.79179, -0.19683, -0.23564, -0.2615 , -0.3003 ,
       -0.31324, -0.32617, -0.32617, -0.32617, -0.32617, -0.33911,
       -0.32617, -0.32617, -0.32617, -0.33911, -0.32617, -0.32617,
       -0.32617, -0.32617, -0.32617, -0.32617, -0.31324, -0.32617,
       -0.31324, -0.31324, -0.32617, -0.32617, -0.3003 , -0.32617,
       -0.31324, -0.31324, -0.31324, -0.3003 , -0.31324, -0.31324,
       -0.3003 , -0.31324, -0.31324, -0.31324, -0.31324, -0.3003 ,
       -0.31324, -0.31324, -0.3003 , -0.31324, -0.3003 , -0.3003 ,
       -0.3003 , -0.31324, -0.3003 , -0.3003 , -0.3003 , -0.3003 ,
       -0.3003 , -0.3003 , -0.3003 , -0.31324, -0.3003 , -0.3003 ,
       -0.3003 , -0.3003 , -0.3003 , -0.3003 , -0.3003 , -0.3003 ,
       -0.3003 , -0.3003 , -0.3003 , -0.3003 , -0.3003 , -0.3003 ,
       -0.3003 , -0.3003 , -0.31324, -0.31324, -0.3003 , -0.3003 ,
       -0.31324, -0.3003 , -0.3003 , -0.3003 , -0.31324, -0.31324,
       -0.31324, -0.3003 , -0.31324, -0.31324, -0.3003 , -0.30

In [31]:
x_unseen=np.round(x_unseen, 5)

In [48]:
x_train=np.round(x_train, 5)

In [49]:
x_test=np.round(x_test, 5)

In [41]:
pd.DataFrame(x_unseen).to_csv("x_unseen.csv",sep=",")

In [42]:
pd.DataFrame(y_unseen).to_csv("y_unseen.csv",sep=",")

In [50]:
pd.DataFrame(x_train).to_csv("x_train.csv",sep=",")

In [51]:
pd.DataFrame(x_test).to_csv("x_test.csv",sep=",")

In [53]:
pd.DataFrame(y_train).to_csv("y_train.csv",sep=",")

In [52]:
pd.DataFrame(y_test).to_csv("y_test.csv",sep=",")

In [49]:
x_train[0]

array([ 0.52529,  0.53818,  0.52529,  0.53818,  0.53818,  0.55107,
        0.55107,  0.56397,  0.56397,  0.56397,  0.56397,  0.57686,
        0.57686,  0.57686,  0.57686,  0.57686,  0.57686,  0.56397,
        0.57686,  0.56397,  0.56397,  0.56397,  0.56397,  0.56397,
        0.56397,  0.56397,  0.56397,  0.56397,  0.55107,  0.55107,
        0.55107,  0.53818,  0.53818,  0.53818,  0.53818,  0.53818,
        0.52529,  0.53818,  0.53818,  0.53818,  0.53818,  0.55107,
        0.53818,  0.53818,  0.55107,  0.55107,  0.55107,  0.53818,
        0.55107,  0.53818,  0.55107,  0.55107,  0.55107,  0.55107,
        0.55107,  0.55107,  0.53818,  0.55107,  0.55107,  0.55107,
        0.55107,  0.55107,  0.55107,  0.55107,  0.53818,  0.55107,
        0.53818,  0.53818,  0.55107,  0.53818,  0.53818,  0.55107,
        0.55107,  0.55107,  0.53818,  0.52529,  0.52529,  0.52529,
        0.52529,  0.52529,  0.52529,  0.52529,  0.52529,  0.53818,
        0.53818,  0.53818,  0.53818,  0.53818,  0.53818,  0.53

In [54]:
pd.DataFrame(y_train).to_csv("y_train.csv")

In [48]:
def build_fc_model():
  fc_model = tf.keras.Sequential([
      # The sequential model is a linear stack of layers.
      tf.keras.layers.Dense(32, activation=tf.nn.relu,input_shape=(270,)),
      # activation=tf.nn.relu returns the standard ReLU activation: max(x, 0), 
      # the element-wise maximum of 0 and the input tensor.
      tf.keras.layers.Dense(10, activation=tf.nn.softmax),
      #tf.nn.softmax produces just the result of applying the softmax function
      #to an input tensor. The softmax "squishes" the inputs so that sum(input) = 1:
      #it's a way of normalizing
      tf.keras.layers.Dense(1)
      
  ])
  return fc_model

model = build_fc_model()

In [49]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                8672      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 9,013
Trainable params: 9,013
Non-trainable params: 0
_________________________________________________________________


In [50]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=2e-1), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
# optimizer= tf.keras.optimizers.SGD with SGD standing for **stochastic gradient descent 
# learning rate is the amount of change the model undergoes during the training process
# 'sparse_categorical_crossentropy' loss the crossentropy loss between the labels and predictions.

In [None]:
BATCH_SIZE = 64
EPOCHS = 5

model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS)

In [None]:
x_data = np.genfromtxt('x_train.csv',delimiter=',')

In [50]:
x_data.shape

(0,)