In [1]:
import pyodbc
from meyerDB import cable_connection
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML, display
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeavePOut
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.dummy import DummyRegressor
import sklearn.preprocessing as pp

from keras import models
from keras.layers import Dense, Dropout


# init db connection
conn = pyodbc.connect(cable_connection)
cursor = conn.cursor()
print('Database connection ok')

def display_table(data):
    html = "<table>"
    for row in data:
        html += "<tr>"
        for field in row:
            try:
                value = str(round(100*field, 1)).replace('.', ',') + '%'
            except:
                value = field
            html += "<td><h4>%s</h4><td>"%(value)
        html += "</tr>"
    html += "</table>"
    display(HTML(html))

Database connection ok


In [16]:
# Dynamic whole ship cable quantities
# Predictors:
# GT, current total quantity, past 10 week time series
# Algorithms:
# Neural Network

# Get the data
cursor.execute("SELECT r.project_id, gross_tonnage, sum(amount) FROM routed as r"
    " LEFT JOIN projects as p ON p.project_id=r.project_id"
    " GROUP BY r.project_id, gross_tonnage")
data = np.array(cursor.fetchall()).astype('float32')
ships = data[:, 0]
gt = data[:, 1]
y = data[:, -1]

gtmax = gt.max()
ymax = y.max()
gt = gt/gtmax
y = y/ymax

# init lpo split
p = 2 #ships
lpo = LeavePOut(p)

# update temporary cable counts
cursor.execute("UPDATE progress SET cables=0")
cursor.execute(
    "UPDATE progress SET progress.cables=t1.cables FROM"
    " (SELECT project_id, pw, sum(amount) as cables FROM ship_readiness GROUP BY project_id, pw) t1"
    " WHERE t1.project_id=progress.project_id AND t1.pw=progress.wk"
)
cursor.commit()

# train and evaluate
for train_i, test_i in lpo.split(y):

    X_train = []
    for i in train_i:
        print(i)
        ship = ships[i]
        step = 5
        window = 10
        for wk in range(-90, 1, step):
            cursor.execute(
                "SELECT cables FROM progress WHERE"
                " wk > {} AND wk <= {} AND project_id={}"
                .format(wk - window, wk, ship)
            )
            data = np.array(cursor.fetchall())/ymax
            features = list(data[:, 0])
            cursor.execute(
                "SELECT sum(cables) FROM progress WHERE"
                " wk <= {} AND project_id={}"
                .format(wk, ship)
            )
            current_sum = cursor.fetchall()[0][0]/ymax
            features.append(current_sum)
            features.append(gt[i]/gtmax)
            X_train.append(features)
    X_train = np.array(X_train)
    print(X_train.shape)
    print(X_train[100])
    break

    # define model
    model = models.Sequential()
    model.add(Dense(1, activation='sigmoid', input_shape=(1,), kernel_initializer='normal'))
    #model.add(Dense(1, activation='relu'))

    # Compile model
    model.compile(optimizer='adam',
                loss='mean_squared_error',
                metrics=['accuracy'])

    # Train model
    model.fit(X_train, y_train,
            batch_size=1,
            epochs=500,
            verbose=0)



    #evaluate
    preds = model.predict(X_test)*ymax
    y_true = y_test*ymax
    print(100*(1-mape(y_true, preds)))

2
3
4
5
6
7
8
9
10
(171, 12)
[1.78912116e-02 5.69949513e-02 5.48227078e-02 1.37523240e-01
 2.45002707e-02 1.34380285e-02 8.64868494e-03 2.39299309e-02
 3.74993180e-02 3.92514657e-02 5.24289593e-01 5.40511337e-06]
