In [15]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
import pandas as pd
import io
import requests
import numpy as np
from sklearn import metrics
import os
from sklearn.preprocessing import MinMaxScaler

In [16]:
# Convert all missing values in the specified column to the median
def missing_median(df, name):
    med = df[name].median()
    df[name] = df[name].fillna(med)
    
    
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(
        target_type, '__iter__') else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
    # Regression
    return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)

In [17]:
path = "./data/"
save_path = "./dnn/"

In [18]:
filename_read = os.path.join(path,"Train_Solar.csv")
df = pd.read_csv(filename_read,na_values=['NA','?'])

In [19]:
df.head(1)

Unnamed: 0,datetime,date,time,I,T,UV,WS,RH,P
0,1/1/2017 6:00,1/1/2017,6:00:00,0.0,24.6,0.0,1.2,59.8,0.0


In [20]:
datetime_sr = df['datetime']
date_sr = df['date']
time_sr = df['time']

In [21]:
df.drop('datetime',1,inplace=True)
df.drop('date',1,inplace=True)
df.drop('time',1,inplace=True)
print(df.head(1))

     I     T   UV   WS    RH    P
0  0.0  24.6  0.0  1.2  59.8  0.0


In [22]:
missing_median(df, 'I')
missing_median(df, 'T')
missing_median(df, 'UV')
missing_median(df, 'WS')
missing_median(df, 'RH')
missing_median(df, 'P')

In [23]:
from sklearn.ensemble import IsolationForest
clf = IsolationForest(max_samples = 100, random_state = 42)
clf.fit(df)
y_noano = clf.predict(df)
y_noano = pd.DataFrame(y_noano, columns = ['Top'])
y_noano[y_noano['Top'] == 1].index.values

df = df.iloc[y_noano[y_noano['Top'] == 1].index.values]
df.reset_index(drop = True, inplace = True)
print("Number of Outliers:", y_noano[y_noano['Top'] == -1].shape[0])
print("Number of rows without outliers:", df.shape[0])

Number of Outliers: 13159
Number of rows without outliers: 118427


In [24]:
import warnings
warnings.filterwarnings('ignore')

col_train = list(df.columns)

mat_train = np.matrix(df)

prepro = MinMaxScaler()
prepro.fit(mat_train)

df = pd.DataFrame(prepro.transform(mat_train),columns = col_train)

In [25]:
df.head(1)

Unnamed: 0,I,T,UV,WS,RH,P
0,0.0,0.251029,0.0,0.033149,0.65991,0.0


In [26]:
x,y = to_xy(df,"P")

In [27]:
print(type(x))
print(x.shape)
print(type(y))
print(y.shape)

<class 'numpy.ndarray'>
(118427, 5)
<class 'numpy.ndarray'>
(118427, 1)


In [28]:
print(x[:5])

[[0.         0.2510288  0.         0.03314917 0.6599099 ]
 [0.         0.24691358 0.         0.03867403 0.6644144 ]
 [0.         0.24691358 0.         0.05248619 0.6689189 ]
 [0.         0.24691358 0.         0.0359116  0.6711712 ]
 [0.         0.2510288  0.         0.03867403 0.6672297 ]]


In [29]:
print(y[:5])

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [30]:
model = Sequential()
model.add(Dense(50, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(25, activation='relu')) # Hidden 2
model.add(Dense(1)) # Output

In [31]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [32]:
model.fit(x,y,verbose=0,epochs=100)

<keras.callbacks.History at 0x7fd63f7f2e80>

In [33]:
# Predict
pred = model.predict(x)

In [34]:
# Measure RMSE error.  RMSE is common for regression.
score = np.sqrt(metrics.mean_squared_error(pred,y))
print(f"Before save score (RMSE): {score}")

Before save score (RMSE): 0.05685115605592728


In [35]:
# save neural network structure to JSON (no weights)
model_json = model.to_json()
with open(os.path.join(path,"R1_model.json"), "w") as json_file:
    json_file.write(model_json)

In [36]:
# save neural network structure to YAML (no weights)
model_yaml = model.to_yaml()
with open(os.path.join(path,"R1_model.yaml"), "w") as yaml_file:
    yaml_file.write(model_yaml)

In [37]:
# save entire network to HDF5 (save everything, suggested)
model.save(os.path.join(path,"R1_model.h5"))

In [39]:
from keras.models import load_model
model2 = load_model(os.path.join(path,"R1_model.h5"))
pred = model2.predict(x)
# Measure RMSE error.  RMSE is common for regression.
score = np.sqrt(metrics.mean_squared_error(pred,y))
print(f"After load score (RMSE): {score}")

After load score (RMSE): 0.05685115605592728
