In [61]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler

In [62]:
def load_dataset(dataset: pd.DataFrame, predict_col: list,
                 columns_to_drop: list, minmax_columns: list) -> tuple:
    if len(columns_to_drop) > 0:
        dataset = dataset.drop(columns_to_drop, axis=1)
    dataset = dataset.astype(float)
    dataset = dataset.interpolate(method='linear', axis=0).ffill().bfill()
    transformer = make_column_transformer(
        (MinMaxScaler(), minmax_columns)
    )
    features = dataset.drop(predict_col, axis=1)
    labels = dataset[predict_col]
    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.2, random_state=42
    )
    transformer.fit(train_features)
    train_features = transformer.transform(train_features)
    test_features = transformer.transform(test_features)
    return train_features, train_labels, test_features, test_labels

In [64]:
data = pd.read_csv("train_data_200k.csv")
rain_features, train_labels, test_features, test_labels = load_dataset(
    dataset=data,
    columns_to_drop=['Unnamed: 0'],
    minmax_columns=['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6', 'tag7',
                    'tag8', 'tag9', 'tag10', 'tag11', 'tag12', 'tag13', 'tag14', 'tag15',
                    'tag16', 'tag17', 'tag18', 'tag19', 'tag20', 'tag21', 'tag22', 'tag23',
                    'tag24', 'tag25', 'tag26', 'tag27', 'tag28', 'tag29', 'tag30', 'tag31',
                    'tag32', 'tag33', 'tag34', 'tag35', 'tag36', 'tag37', 'tag38', 'tag39',
                    'tag40', 'tag41', 'tag42', 'tag43', 'tag44', 'tag45', 'tag46', 'tag47',
                    'tag48', 'tag49', 'tag50', 'tag51', 'tag52', 'tag53', 'tag54', 'tag55',
                    'tag56', 'tag57', 'tag58', 'tag59', 'tag60', 'tag61', 'tag62', 'tag63',
                    'tag64', 'tag65', 'tag66', 'tag67', 'tag68', 'tag69', 'tag70', 'tag71',
                    'tag72', 'tag73', 'tag74', 'tag75', 'tag76', 'tag77', 'tag78', 'tag79'],
    predict_col=['target1', 'target2', 'target3', 'target4']
)

In [78]:
def train(train_features: pd.DataFrame, train_labels: pd.Series,
          test_features: pd.DataFrame, test_labels: pd.Series,
          epochs: int):
    def base_model(inputs):
        x= Dense(512, activation='relu')(inputs)
        x= Dense(256, activation='relu')(x)
        x= Dense(128, activation='relu')(x)
        return x
    def final_model(inputs):
        x = base_model(inputs)
        target1 = Dense(units='1', name='target1')(x)
        target2 = Dense(units='1', name='target2')(x)
        target3 = Dense(units='1', name='target3')(x)
        target4 = Dense(units='1', name='target4')(x)
        model = Model(inputs=inputs, outputs = [target1, target2, target3, target4])
        return model

    inputs = tf.keras.layers.Input(shape=(79,))

    model = final_model(inputs)

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), 
             loss = {'target1': 'mse',
                     'target2': 'mse',
                     'target3': 'mse',
                     'target4': 'mse'},
             metrics={
                 'target1': tf.keras.metrics.RootMeanSquaredError(),
                 'target2': tf.keras.metrics.RootMeanSquaredError(),
                 'target3': tf.keras.metrics.RootMeanSquaredError(),
                 'target4': tf.keras.metrics.RootMeanSquaredError(),
                     })
    history = model.fit(train_features, train_labels, 
                   epochs=epochs, validation_data=(test_features, test_labels))
    
    model.save("mymodel.h5")

In [80]:
train(train_features=train_features, train_labels=train_labels,
      test_features=test_features, test_labels=test_labels, epochs=11)

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


In [81]:
def load_dataset_test(dataset: pd.DataFrame, columns_to_drop: list, minmax_columns: list) -> tuple:
    if len(columns_to_drop) > 0:
        dataset = dataset.drop(columns_to_drop, axis=1)
    dataset = dataset.astype(float)
    dataset = dataset.interpolate(method='linear', axis=0).ffill().bfill()
    dataset = dataset.fillna(0)
    transformer = make_column_transformer(
        (MinMaxScaler(), minmax_columns)
    )
    transformer.fit(dataset)
    dataset = transformer.transform(dataset)
    return dataset

In [82]:
test_data = pd.read_csv("test_data_100k.csv")
test_dataset = load_dataset_test(test_data,
                                columns_to_drop=['Unnamed: 0'],
                                minmax_columns=['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6', 'tag7',
                                              'tag8', 'tag9', 'tag10', 'tag11', 'tag12', 'tag13', 'tag14', 'tag15',
                                              'tag16', 'tag17', 'tag18', 'tag19', 'tag20', 'tag21', 'tag22', 'tag23',
                                              'tag24', 'tag25', 'tag26', 'tag27', 'tag28', 'tag29', 'tag30', 'tag31',
                                              'tag32', 'tag33', 'tag34', 'tag35', 'tag36', 'tag37', 'tag38', 'tag39',
                                              'tag40', 'tag41', 'tag42', 'tag43', 'tag44', 'tag45', 'tag46', 'tag47',
                                              'tag48', 'tag49', 'tag50', 'tag51', 'tag52', 'tag53', 'tag54', 'tag55',
                                              'tag56', 'tag57', 'tag58', 'tag59', 'tag60', 'tag61', 'tag62', 'tag63',
                                              'tag64', 'tag65', 'tag66', 'tag67', 'tag68', 'tag69', 'tag70', 'tag71',
                                              'tag72', 'tag73', 'tag74', 'tag75', 'tag76', 'tag77', 'tag78', 'tag79'])

In [83]:
model = tf.keras.models.load_model('mymodel.h5')
predictions = model.predict(test_dataset)
for i, n in enumerate(['target1', 'target2', 'target3', 'target4']):
  test_data[n] = predictions[i]



In [84]:
test_data

Unnamed: 0.1,Unnamed: 0,tag1,tag2,tag3,tag4,tag5,tag6,tag7,tag8,tag9,...,tag74,tag75,tag76,tag77,tag78,tag79,target1,target2,target3,target4
0,2017-01-03 21:21:00,12.59972,,,,,,45.27993,43.13644,,...,36.59567,64.47523,45.08920,,,,0.178173,0.209130,0.292200,0.155196
1,2017-01-03 21:22:00,12.65850,,,,,,,42.98455,,...,36.60715,64.48697,45.18127,,,,0.172568,0.203522,0.285352,0.155045
2,2017-01-03 21:23:00,12.69538,,,,,,,41.87401,,...,36.60194,64.48034,45.00529,75.84169,,,0.175588,0.206477,0.289088,0.153881
3,2017-01-03 21:24:00,12.84726,,,,,,,41.78006,,...,36.61879,64.50220,45.10987,,93.62097,,0.176306,0.206130,0.285340,0.154260
4,2017-01-03 21:25:00,12.98246,,,,,,,,,...,36.59875,64.48300,45.12246,,,56.14647,0.173153,0.207510,0.285098,0.153818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,2017-03-14 07:56:00,12.08330,,,,44.63140,,,,,...,42.65038,66.08608,53.43510,75.92390,,55.59630,0.205973,0.222776,0.290184,0.162609
99996,2017-03-14 07:57:00,12.13475,,,,,,,,,...,42.68734,66.09002,53.44431,,,55.64390,0.207422,0.221265,0.286231,0.161516
99997,2017-03-14 07:58:00,12.18710,,,,,,,,,...,42.67019,66.11372,53.40477,,103.04540,55.63939,0.210152,0.222101,0.285768,0.161306
99998,2017-03-14 07:59:00,12.25297,,,,,,,,,...,42.66780,66.10795,53.46239,,,55.59716,0.211064,0.221760,0.285819,0.163837
