In [2]:
import pyodbc
from meyerDB import cable_connection
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML, display
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeavePOut
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.dummy import DummyRegressor
import sklearn.preprocessing as pp

from keras import models
from keras.layers import Dense, Dropout


# init db connection
conn = pyodbc.connect(cable_connection)
cursor = conn.cursor()
print('Database connection ok')

def display_table(data):
    html = "<table>"
    for row in data:
        html += "<tr>"
        for field in row:
            try:
                value = str(round(100*field, 1)).replace('.', ',') + '%'
            except:
                value = field
            html += "<td><h4>%s</h4><td>"%(value)
        html += "</tr>"
    html += "</table>"
    display(HTML(html))

Database connection ok


In [19]:
# Dynamic whole ship cable quantities
# Predictors:
# GT, current total quantity, past 10 week time series
# Algorithms:
# Neural Network

# Get the data
cursor.execute("SELECT gross_tonnage, sum(amount) FROM routed as r"
    " LEFT JOIN projects as p ON p.project_id=r.project_id"
    " GROUP BY r.project_id, gross_tonnage")
data = np.array(cursor.fetchall()).astype('float32')
gt = data[:, 0].reshape((-1, 1))
y = data[:, -1]

gtmax = gt.max()
ymax = y.max()
gt = gt/gtmax
y = y/ymax

# init lpo split
p = 2 #ships
lpo = LeavePOut(p)

for train_i, test_i in lpo.split(y):

    X_train = gt[train_i]
    y_train = y[train_i]
    X_test = gt[test_i]
    y_test = y[test_i]

    model = models.Sequential()
    model.add(Dense(1, activation='sigmoid', input_shape=(1,), kernel_initializer='normal'))
    #model.add(Dense(1, activation='relu'))

    # Compile model
    model.compile(optimizer='adam',
                loss='mean_squared_error',
                metrics=['accuracy'])

    # Train model
    model.fit(X_train, y_train,
            batch_size=1,
            epochs=500,
            verbose=0)

    preds = model.predict(X_test)*ymax
    y_true = y_test*ymax
    print(100*(1-mape(y_true, preds)))

80.23249804973602
22.406470775604248
79.46552038192749
80.67543506622314
78.11053544282913
85.13503074645996
80.01330643892288
77.70494222640991
13.697671890258789
76.60808116197586
17.063426971435547
76.39818042516708
77.06307470798492
73.89930188655853
81.10024034976959
76.40611529350281
74.70195591449738
8.343243598937988
72.69564867019653
20.769202709197998
19.158244132995605
15.583431720733643
23.065578937530518
23.845267295837402
23.897933959960938
-59.874022006988525
13.439708948135376
76.78823918104172
74.69086050987244
80.69603443145752
60.98918318748474
57.3100745677948
14.974170923233032
73.2280820608139
74.67449307441711
81.14917576313019
76.37293338775635
74.07186627388
8.89045000076294
72.29564487934113
78.7000834941864
75.35884976387024
73.24945330619812
3.9971232414245605
70.35936713218689
80.81340491771698
78.40452194213867
15.667933225631714
77.42687165737152
58.01324546337128
16.496974229812622
73.43074381351471
13.123089075088501
71.17224633693695
4.4839441776275635