# Prism

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('./extra_variables_1_hour.csv')
df

Unnamed: 0,mean,std,min,25%,50%,75%,max,hour,weekend,label
0,2.978333,0.144656,2.0,3.0,3.0,3.0,3.0,1,0,dishwasher
1,623.279931,612.686744,1.0,2.0,394.0,1269.0,1303.0,9,0,radiator
2,1.004722,0.068566,1.0,1.0,1.0,1.0,2.0,7,0,laptop
3,2.776667,0.416037,2.0,3.0,3.0,3.0,3.0,9,0,air_purifier
4,91.145556,463.427045,0.0,0.0,0.0,0.0,2495.0,8,0,boiler
...,...,...,...,...,...,...,...,...,...,...
20864,12.711111,41.720699,1.0,1.0,1.0,1.0,1354.0,8,0,coffee
20865,2.431806,1.454841,1.0,1.0,2.0,3.0,11.0,14,0,printer
20866,0.918472,0.290070,0.0,1.0,1.0,1.0,2.0,21,1,dryer
20867,0.302778,0.458351,0.0,0.0,0.0,1.0,1.0,14,1,coffee


In [3]:
cat = pd.Categorical(df['label'])
df['label'] = cat.codes
df

Unnamed: 0,mean,std,min,25%,50%,75%,max,hour,weekend,label
0,2.978333,0.144656,2.0,3.0,3.0,3.0,3.0,1,0,7
1,623.279931,612.686744,1.0,2.0,394.0,1269.0,1303.0,9,0,17
2,1.004722,0.068566,1.0,1.0,1.0,1.0,2.0,7,0,13
3,2.776667,0.416037,2.0,3.0,3.0,3.0,3.0,9,0,2
4,91.145556,463.427045,0.0,0.0,0.0,0.0,2495.0,8,0,3
...,...,...,...,...,...,...,...,...,...,...
20864,12.711111,41.720699,1.0,1.0,1.0,1.0,1354.0,8,0,4
20865,2.431806,1.454841,1.0,1.0,2.0,3.0,11.0,14,0,16
20866,0.918472,0.290070,0.0,1.0,1.0,1.0,2.0,21,1,8
20867,0.302778,0.458351,0.0,0.0,0.0,1.0,1.0,14,1,4


In [4]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('label', axis=1), df['label'], test_size=0.2, random_state=42)

## Using Sklearn Models

In [5]:
import sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

In [6]:
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
print("Train Accuracy:", accuracy_score(y_train, random_forest.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Train Accuracy: 0.9514225816112608
Test Accuracy: 0.7992333493052228


In [7]:
gradient_boost = GradientBoostingClassifier(n_estimators=100)
gradient_boost.fit(X_train, y_train)
y_pred = gradient_boost.predict(X_test)
print("Train Accuracy:", accuracy_score(y_train, gradient_boost.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Train Accuracy: 0.8613956274333633
Test Accuracy: 0.802587446094873


In [19]:
extra_trees = ExtraTreesClassifier(n_estimators=100)
extra_trees.fit(X_train, y_train)
y_pred = extra_trees.predict(X_test)
print("Train Accuracy:", accuracy_score(y_train, extra_trees.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Train Accuracy: 0.9514225816112608
Test Accuracy: 0.798754192620987


## Using MLPRegressor

In [9]:
# Save Model Summary and Accuracy to json file
import json
def save_model_stats(history, model_name, model):
    f = json.load(open('./model_stats.json'))
    stats = {}
    stats['summary'] = model.to_json()
    stats['train_accuracy'] = accuracy_score(y_train, np.argmax(model.predict(X_train), axis=1))
    stats['test_accuracy'] = accuracy_score(y_test, np.argmax(model.predict(X_test), axis=1))
    stats['history'] = history.history
    f[model_name] = stats
    json.dump(f, open('./model_stats.json', 'w'), indent=4)

i = 0

In [36]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential([
    layers.Input(shape=(X_train.shape[1])),
    layers.Dense(32, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(128, activation='relu'),
    
    layers.Dense(128, activation='relu'), 
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),

    layers.Dense(24, activation='softmax'),
])

model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adamax(), metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                320       
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 128)               16512     
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 32)                2080      
                                                                 
 dense_6 (Dense)             (None, 24)                7

In [37]:
callbacks = [keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), keras.callbacks.ReduceLROnPlateau(patience=10, monitor='val_loss')]

In [None]:
history = model.fit(X_train, y_train, epochs=200, validation_split=0.2, callbacks=callbacks)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200

In [None]:
print(model.summary())
print("Train Accuracy:", accuracy_score(y_train, np.argmax(model.predict(X_train), axis=1)))

print("Test Accuracy:", accuracy_score(y_test, np.argmax(model.predict(X_test), axis=1)))

In [None]:
plt.plot(history.history['loss'], label='Train_loss', color='red');
plt.plot(history.history['val_loss'], label='Val_loss', color='blue');
plt.ylim(0.1, 1)

plt.legend()


In [None]:
plt.plot(history.history['accuracy'], label='Train_acc', color='green');
plt.plot(history.history['val_accuracy'], label='Val_acc', color='orange');
# Limit the y axis to 0 to 1
plt.ylim(0.7)
plt.legend()