In [1]:
import pyodbc
from meyerDB import cable_connection
import PyQt5
import matplotlib.pyplot as plt
%matplotlib qt
import numpy as np
from IPython.display import HTML, display, clear_output

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeavePOut
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.dummy import DummyRegressor
import sklearn.preprocessing as pp
from scipy.optimize import curve_fit
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, RBF2, WhiteKernel, ConstantKernel, RationalQuadratic
from scipy.stats import norm

# init db connection
conn = pyodbc.connect(cable_connection)
cursor = conn.cursor()
print('Database connection ok')

def display_table(data):
    html = "<table>"
    for row in data:
        html += "<tr>"
        for field in row:
            try:
                value = str(round(100*field, 1)).replace('.', ',') + '%'
            except:
                value = field
            html += "<td><h4>%s</h4><td>"%(value)
        html += "</tr>"
    html += "</table>"
    display(HTML(html))

def logifunc(x,x0,k,l, A):
    #l = 2300
    return l / (1 + A*np.exp(-k*(x - x0)))

plt.rcParams['font.size'] = 14
plt.rcParams['font.weight'] = 'bold'
plt.rcParams["legend.loc"] = 'upper right'

Database connection ok


In [2]:
# get ensemble quantity

def get_ensemble_quantity(linear_pred, wk, ship):


    pred_ends = np.load('pred_ends.npy', allow_pickle=True)
    progresses = np.load('progress.npy', allow_pickle=True)


    data = pred_ends.item().get(ship)
    pred_end = data[np.argwhere(data[:, 0]==wk)][0][0][1]


    data = progresses.item().get(ship)
    cables = data[np.argwhere(data[:, 0]==wk)][0][0][1]

    estimated_completeness = cables/linear_pred
    if estimated_completeness < 0.002: return linear_pred

    pred = cables + pred_end

    estimated_completeness = np.square(cables/linear_pred)
    if estimated_completeness > 1.0: estimated_completeness = 1.0
    w1, w2 = 1-estimated_completeness, estimated_completeness
    avpred = (w1*linear_pred + w2*pred)/(w1+w2)
    return avpred


In [3]:
# Demo

def meanfunc(x, fx, fy):
    params, _ = curve_fit(logifunc, fx, fy, bounds=([-100, 1e-3, 0.9999, 1e-4], [-10, 2, 1.0001, 100]))
    return logifunc(x, *params)

# Get the data
cursor.execute("SELECT r.project_id, gross_tonnage, sum(amount) FROM routed as r"
    " LEFT JOIN projects as p ON p.project_id=r.project_id"
    " GROUP BY r.project_id, gross_tonnage")
data = np.array(cursor.fetchall())
ships = data[:, 0].astype('int32')
gts = data[:, 1]
cables = data[:, -1]

x = np.arange(-100, 1)

# init linear quantity
model = linear_model.LinearRegression()
model.fit(gts.reshape(-1, 1), cables)
print(model.coef_, model.intercept_)

# update progress table
def set_progress_data(wk):
    cursor.execute("UPDATE progress SET cables=0")
    cursor.execute(

        "UPDATE progress SET progress.cables=t1.cables FROM"
        " (SELECT project_id, {}, sum(amount) as cables FROM ship_readiness GROUP BY project_id, {}) t1"
        " WHERE t1.project_id=progress.project_id AND t1.{}=progress.wk".format(wk, wk, wk)
    )
    cursor.commit()
    # get progress planned data
    ship_data = {}
    for i in range(ships.shape[0]):
        ship = ships[i]
        cursor.execute("SELECT wk, cables FROM progress WHERE project_id={} ORDER BY wk".format(ship))
        data = cursor.fetchall()
        ship_data[ship] = np.array(data)
        fx = []
        fy = []
    # set mean prior function
    for i in range(11):
        ship = ships[i]
        fx.append(ship_data[ship][:, 0])
        fy.append(ship_data[ship][:, 1].cumsum()/cables[i])
    fy = np.concatenate(fy, axis=0)
    fx = np.concatenate(fx, axis=0)
    fmean_y = []
    fmean_x = []
    p = np.argsort(fx)
    fx = fx[p]
    fy = fy[p]
    return ship_data, meanfunc(x, fx, fy)


ship_data_p, mf_p = set_progress_data('pw')
ship_data_r, mf_r = set_progress_data('rw')

# set ship
ship = 1394

# init gaussian process regressor
(l, c, noise) = (10.0, 0.01, 0.001)#best_params
#print(best_params)
kernel = ConstantKernel(constant_value=c, constant_value_bounds='fixed') * RBF(
    length_scale=l, length_scale_bounds='fixed')
gp = GaussianProcessRegressor(kernel=kernel,
                            alpha=noise)
gt = gts[np.argwhere(ships==ship)][0]
linear_pred = model.predict(gt.reshape(1, -1))[0]
for iii in range(10):
    for wk in np.arange(-70, 1):
        pred = get_ensemble_quantity(linear_pred, wk, ship)
        try:
            xidxs = np.argwhere(x <= wk)[:, 0]
            if xidxs.shape[0] == 0: continue
            xidxs_inv = np.argwhere(x > wk)[:, 0]
            if xidxs_inv.shape[0] == 0: continue
        except IndexError:
            continue
        # pred and plot planned progress
        y = ship_data_p[ship][:, 1].cumsum()/pred
        X = x[xidxs].reshape(-1, 1)
        Y = y[xidxs] - mf_p[xidxs]
        gp.fit(X, Y)
        y_mean, y_std = gp.predict(x.reshape(-1, 1), return_std=True)
        y_std = (1.96 * (y_std + noise*np.ones_like(y_std))) * pred
        y_mean = (y_mean + mf_p) * pred
        y = y*pred
        plt.plot(x[xidxs], y[xidxs], 'r')
        plt.plot(x[xidxs_inv], y[xidxs_inv], 'r:', alpha = 0.6, label='_nolegend_')
        plt.plot(x[xidxs_inv], y_mean[xidxs_inv], 'r--', label='_nolegend_')
        plt.fill_between(x[xidxs_inv], (y_std[xidxs_inv] + y_mean[xidxs_inv]), (y_mean[xidxs_inv] - y_std[xidxs_inv]), alpha=0.2, color='k', label='_nolegend_')
        # pred and plot ready progress
        y = ship_data_r[ship][:, 1].cumsum()/pred
        X = x[xidxs].reshape(-1, 1)
        Y = y[xidxs] - mf_r[xidxs]
        gp.fit(X, Y)
        y_mean, y_std = gp.predict(x.reshape(-1, 1), return_std=True)
        y_std = (1.96 * (y_std + noise*np.ones_like(y_std))) * pred
        y_mean = (y_mean + mf_r) * pred
        y = y*pred
        plt.plot(x[xidxs], y[xidxs], 'b')
        plt.plot(x[xidxs_inv], y[xidxs_inv], 'b:', alpha = 0.6, label='_nolegend_')
        plt.plot(x[xidxs_inv], y_mean[xidxs_inv], 'b--', label='_nolegend_')
        plt.fill_between(x[xidxs_inv], (y_std[xidxs_inv] + y_mean[xidxs_inv]), (y_mean[xidxs_inv] - y_std[xidxs_inv]), alpha=0.2, color='k', label='_nolegend_')
        # finalize plotting
        plt.ylim([0, 3000000])
        plt.xlim([-100, 0])
        #plt.pause(0.35)
        plt.legend(['Design progress', 'Cabling progress'], loc='upper left')
        plt.xlabel('Weeks to delivery')
        plt.ylabel('Cable quantity (m)')
        #plt.savefig('progress/{}.png'.format(100+wk))
        plt.pause(0.2)
        plt.cla()


[0.21598242] -121.70172793910751


NameError: name 'get_ensemble_quantity' is not defined