In [5]:
import numpy as np
import pandas as pd 
import tensorflow as tf 

from keras.models import Sequential
from keras.layers import Dense, Flatten, Input, Dropout  

## Data 

In [6]:
import xlrd
import csv

def csv_from_excel(path, sheet, csv_name):
    wb = xlrd.open_workbook(path)
    sh = wb.sheet_by_name(sheet)
    csv_file = open(csv_name, 'w')
    wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL)

    for rownum in range(sh.nrows):
        if sh.row_values(rownum)[0] == '':
            return
        else:
            wr.writerow(sh.row_values(rownum))
    csv_file.close()

In [7]:
csv_from_excel("./mergeddata.xlsx", "Sheet1", "mergeddata_july9.csv")

In [8]:
# data = pd.read_csv("mergeddata_july9.csv")
# print(len(data))
# data[:5, :]

In [9]:
def read_data_as_matrix(path):
    """ return raw features and labels data as np.arrays  
    """
    data = pd.read_csv(path)
    mat = data.as_matrix()
    selected = mat[:, :13]
    # rescaling the data 
    selected[:,0] = mat[:,0]/1000000
    selected[:,3] = mat[:,3]/1000
    selected[:,4:] = mat[:,4:]/10
    return selected

def split_data(data, validation, test, shuffle=True):
    """ return the training, validatoin and test set
    """
    if shuffle:
        indices = np.arange(data.shape[0])
        np.random.shuffle(indices)
        temp = data[indices]
    else:
        temp = data
        
    num_validation = int(validation * data.shape[0])
    num_test = int(test * data.shape[0])
    num_train = data.shape[0] - num_validation - num_test
        
    test_data = data[-num_test:]
    validation_data = data[-num_test-num_validation:-num_test]
    train_data = data[:-num_validation-num_test]
    return train_data, validation_data, test_data
        
def split_features_labels(data, label_index):
    labels = data[:, label_index]
    features = np.concatenate((data[:, :label_index], data[:, label_index+1:]), axis=1)
    return features, labels 

In [10]:
mat = read_data_as_matrix('mergeddata_july9.csv')

In [11]:
(training, validation, test) = split_data(mat, 0.1, 0.1)

In [12]:
x_train, y_train = split_features_labels(training, 0)
x_val, y_val = split_features_labels(validation, 0)
x_test, y_test = split_features_labels(test, 0)

In [13]:
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)
print(x_test.shape, y_test.shape)

(2833, 12) (2833,)
(354, 12) (354,)
(354, 12) (354,)


In [14]:
print(x_train[:5])
print()
print(y_train[:5])

[[ 2.     3.     2.249  0.63   9.01   9.01   4.58   4.15   5.35   6.9    8.31
   9.3  ]
 [ 3.     3.     2.749  0.07   4.51   7.96   2.11   7.32   9.86   7.68
   5.14   9.86 ]
 [ 4.     4.     3.249  0.42   9.65   9.72   1.41   7.96   5.85   3.17
   7.89   8.45 ]
 [ 2.     3.     2.249  0.63   9.01   9.01   4.58   4.15   5.35   6.9    8.31
   9.3  ]
 [ 2.     2.     1.999  0.63   9.01   9.01   4.58   4.15   5.35   6.9    8.31
   9.3  ]]

[ 3.075    2.99999  2.9999   2.875    2.595  ]


In [15]:
# x_train = np.hsplit(x_train, 12)
# x_val = np.hsplit(x_val, 12)
# x_test = np.hsplit(x_test, 12)

# x_train = list(x_train)
# x_val = list(x_val)
# x_test = list(x_test)

In [16]:
# y_train = y_train.tolist()
# y_val = y_val.tolist()
# y_test = y_test.tolist()

## Model 

In [32]:
input_dim = 12
batch_size = 64
epochs = 100
dropout_rate = 0.5 

In [41]:
model = Sequential()

In [42]:
model.add(Dense(128, activation='elu', input_dim=input_dim, kernel_initializer='normal'))
model.add(Dense(64, activation='elu', input_dim=input_dim, kernel_initializer='normal'))
model.add(Dense(16, activation='sigmoid', kernel_initializer='normal'))
model.add(Dropout(dropout_rate))
model.add(Dense(8, activation='sigmoid', kernel_initializer='normal'))
model.add(Dropout(dropout_rate))
model.add(Dense(1, kernel_initializer='normal'))

In [43]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 128)               1664      
_________________________________________________________________
dense_22 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_23 (Dense)             (None, 16)                1040      
_________________________________________________________________
dropout_9 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_24 (Dense)             (None, 8)                 136       
_________________________________________________________________
dropout_10 (Dropout)         (None, 8)                 0         
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 9         
Total para

In [44]:
model.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['accuracy'])

In [45]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val))

Train on 2833 samples, validate on 354 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


<keras.callbacks.History at 0x1178b25f8>

## Evaluation

In [38]:
model.predict(x_train)

array([[ nan],
       [ nan],
       [ nan],
       ..., 
       [ nan],
       [ nan],
       [ nan]], dtype=float32)

In [39]:
first_layer = model.

SyntaxError: invalid syntax (<ipython-input-39-6d31bef89484>, line 1)