In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore

In [9]:
# Convert all missing values in the specified column to the median
def missing_median(df, name):
    med = df[name].median()
    df[name] = df[name].fillna(med)
    
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(
        target_type, '__iter__') else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
    # Regression
    return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)

In [2]:
path = "./data/"

In [3]:
filename_read = os.path.join(path,"Train_Solar.csv")
df = pd.read_csv(filename_read,na_values=['NA','?'])

In [4]:
df.head(1)

Unnamed: 0,datetime,date,time,I,T,UV,WS,RH,P
0,1/1/2017 6:00,1/1/2017,6:00:00,0.0,24.6,0.0,1.2,59.8,0.0


In [5]:
datetime_sr = df['datetime']
date_sr = df['date']
time_sr = df['time']

In [6]:
df.drop('datetime',1,inplace=True)
df.drop('date',1,inplace=True)
df.drop('time',1,inplace=True)

In [8]:
missing_median(df, 'I')
missing_median(df, 'T')
missing_median(df, 'UV')
missing_median(df, 'WS')
missing_median(df, 'RH')
missing_median(df, 'P')

In [10]:
x,y = to_xy(df,"P")



In [11]:
print(x[0:5])

[[ 0.    24.6    0.     1.2   59.8  ]
 [ 0.    24.5    0.     1.4   60.   ]
 [ 0.    24.5    0.     1.9   60.2  ]
 [ 0.    24.5    0.     1.3   60.3  ]
 [ 0.    24.6    0.     1.4   60.125]]


In [12]:
print(y[0:5])

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [13]:
# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(    
    x, y, test_size=0.25, random_state=45)

In [14]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential()
model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(10))
model.add(Dense(10))
model.add(Dense(10))
model.add(Dense(1))

Using TensorFlow backend.


In [15]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [16]:
from keras.callbacks import EarlyStopping
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')

In [None]:
model.fit(x,y,validation_data=(x_test,y_test),callbacks=[monitor],verbose=2,epochs=1000)

Train on 131586 samples, validate on 32897 samples
Epoch 1/1000
 - 6s - loss: 26.8542 - val_loss: 0.3150
Epoch 2/1000
 - 6s - loss: 0.3817 - val_loss: 0.7843
Epoch 3/1000
 - 6s - loss: 0.3868 - val_loss: 0.3553
Epoch 4/1000
 - 5s - loss: 0.3155 - val_loss: 0.2735
Epoch 5/1000
 - 5s - loss: 0.2795 - val_loss: 0.4526
Epoch 6/1000
 - 6s - loss: 0.2727 - val_loss: 0.2697
Epoch 7/1000
 - 5s - loss: 0.2646 - val_loss: 0.2857
Epoch 8/1000
 - 5s - loss: 0.2618 - val_loss: 0.2642
Epoch 9/1000
 - 5s - loss: 0.2591 - val_loss: 0.2639
Epoch 10/1000
 - 6s - loss: 0.2584 - val_loss: 0.2629
Epoch 11/1000
 - 5s - loss: 0.2571 - val_loss: 0.2684
Epoch 12/1000
