In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Reshape
from keras.optimizers import Adam

2023-04-16 23:42:28.696312: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Load the data

In [2]:
consumption = pd.read_csv("InputTrain.csv")
washing_machine = pd.read_csv("StepTwo_LabelTrain_WashingMachine.csv")
dishwasher = pd.read_csv("StepTwo_LabelTrain_Dishwasher.csv")
tumble_dryer = pd.read_csv("StepTwo_LabelTrain_TumbleDryer.csv")
microwave = pd.read_csv("StepTwo_LabelTrain_Microwave.csv")
kettle = pd.read_csv("StepTwo_LabelTrain_Kettle.csv")

Transform the label data into a single column

In [3]:
def transform(machine,name):
    time_stamps = []
    for elt in machine.columns :
        if str.startswith(elt,'TimeStep') :
            time_stamps.append(elt)
    machine = pd.melt(machine,id_vars=['Index','House_id'], value_vars=time_stamps, var_name='time', value_name=name)

    machine = pd.DataFrame(machine)
    times = []
    for elt in machine['time']:
        times.append(int(elt[9:]))
    machine['time'] = times
    machine = machine.sort_values(['Index','time'])
    return machine

In [4]:
washing_machine = transform(washing_machine,'Washing Machine')
dishwasher = transform(dishwasher,'Dishwasher')
tumble_dryer = transform(tumble_dryer,'Tumble Dryer')
microwave = transform(microwave,'Microwave')
kettle = transform(kettle,'Kettle')

machines = [washing_machine,dishwasher,tumble_dryer,microwave,kettle]

consumption = transform(consumption,'watt')
display(consumption)

: 

: 

In [18]:
df_train = pd.concat([consumption,washing_machine['Washing Machine'],dishwasher['Dishwasher'],tumble_dryer['Tumble Dryer'],microwave['Microwave'],kettle['Kettle']],names=['Index', 'House_id','time','watt','Washing Machine', 'Dishwasher', 'Tumble Dryer', 'Microwave', 'Kettle', 'watt'],axis=1,join="inner")
display(df_train)
print(len(df_train[df_train["Index"] == 1]))

Unnamed: 0,Index,House_id,time,watt,Washing Machine,Dishwasher,Tumble Dryer,Microwave,Kettle
0,0,1,0,180.0,0.0,0.0,0.0,0.0,0.0
10421,0,1,1,180.0,0.0,0.0,0.0,0.0,0.0
20842,0,1,2,180.0,0.0,0.0,0.0,0.0,0.0
31263,0,1,3,181.0,0.0,0.0,0.0,0.0,0.0
41684,0,1,4,180.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
22467675,10420,16,2155,366.5,0.0,0.0,0.0,0.0,0.0
22478096,10420,16,2156,376.0,0.0,0.0,0.0,0.0,0.0
22488517,10420,16,2157,367.0,0.0,0.0,0.0,0.0,0.0
22498938,10420,16,2158,353.0,0.0,0.0,0.0,0.0,0.0


2160


Model parameters

In [19]:
batch_size = 64
seq_len = 2160
n_features = 1
n_targets = 5

Generating training model

In [35]:
def generate_data(df, batch_size):
    num_curves = len(df['Index'].unique())
    num_batches_per_epoch = num_curves // batch_size

    while True:
        # mix the data
        shuffled_curves = np.random.permutation(df['Index'].unique())

        # foreach batch
        for i in range(num_batches_per_epoch):
            # random pick of batch_size curves
            batch_curves = shuffled_curves[i*batch_size:(i+1)*batch_size]

            # initializing the tensors for the batch
            X_batch = np.zeros((batch_size, seq_len, n_features))
            y_batch = np.zeros((batch_size, seq_len, n_targets))

            # filling the tensors
            for j, curve in enumerate(batch_curves):
                curve_data = df[df['Index'] == curve]
                X_batch[j,:,:] = curve_data[['watt']].values
                y_batch[j,:,:] = curve_data[['Washing Machine', 'Dishwasher', 'Tumble Dryer', 'Microwave', 'Kettle']].values

            yield (X_batch, y_batch)

Creating the model

In [30]:
model = Sequential()
model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(seq_len, n_features)))
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Reshape((1, 128)))
model.add(Conv1D(5, kernel_size=1, activation='sigmoid'))


Compiling the model

In [31]:
optimizer = Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Training the model

In [36]:
train_generator = generate_data(df_train, batch_size=batch_size)
model.fit(train_generator, steps_per_epoch=50, epochs=5)

Epoch 1/5
Epoch 2/5

Load test data in a dataframe

In [43]:
df_test = pd.read_csv("InputTest.csv")
df_test = transform(df_test,"watt")

def generate_test_data(df):
    for _, curve_data in df.groupby('Index'):
        x = np.array(curve_data[['watt']].values).reshape(1, -1, 1)
        yield x

Prediction

In [None]:
y_pred = []
for x_test in generate_test_data(df_test):
    y_test_pred = model.predict(x_test)
    y_pred.append(y_test_pred[0,:,:])

Creating the dataframe for the results

In [None]:
result_df = pd.DataFrame(data=np.array(y_pred).reshape(-1, 5), columns=['Washing Machine', 'Dishwasher', 'Tumble Dryer', 'Microwave', 'Kettle'])
result_df = result_df.round(4)

Exporting the results to a csv file

In [None]:
result_df.to_csv('CNN_5epochs_50steps.csv', index=True,index_label="Index")