In [1]:
import numpy as np
import pandas as pd
import time
import heapq
import json
import psycopg2
import psycopg2.extras
import warnings
import sys
import uuid
import cvxpy as cp
import datetime
import argparse
from scipy.sparse import csr_matrix
from pandasql import sqldf

INF = float('inf')
threadLimit = 4
queryID = "8THeTENpxjSGCNYcnU6NS8"

In [2]:
def ReportStatus(msg, flag, queryID, output=None):
    """
    Print message and update status in biz_model.biz_fir_query_parameter_definition.
    """
    sql = "update fll_t_dw.biz_fir_third_parameter_definition set python_info_data='{0}', success_flag='{1}', update_time='{2}', python_result_json='{3}', version= version + 1 where query_id='{4}' and query_version = 2".format(msg, flag, datetime.datetime.now(), output, queryID)
    print("============================================================================================================================")
    print("Reporting issue:", msg)
    conn = psycopg2.connect(host = "10.18.35.245", port = "5432", dbname = "iflorensgp", user = "fluser", password = "13$vHU7e")
    conn.autocommit = True
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()
    conn.close()

def ConnectDatabase(queryID):
    """
    Load parameters in JSON from biz_model.biz_fir_query_parameter_definition and load data from biz_model.biz_ads_fir_pkg_data.
    """
    try:
        print('Parameters reading...')
        sqlParameter = "select python_json from fll_t_dw.biz_fir_query_parameter_definition where id='{0}'".format(queryID)
        conn = psycopg2.connect(host = "10.18.35.245", port = "5432", dbname = "iflorensgp", user = "fluser", password = "13$vHU7e")
        paramInput = pd.read_sql(sqlParameter, conn)
        if paramInput.shape[0] == 0:
            raise Exception("No Valid Query Request is Found!")
        elif paramInput.shape[0] > 1:
            raise Exception("More than One Valid Query Requests are Found!")
        param = json.loads(paramInput['python_json'][0])
        print(param)
    except Exception as e:
        print("Loading Parameters from GreenPlum Failed!\n", e)
        exit(1)
    try:
        print('Data loading...')
        sqlInput = \
        """
        select billing_status_fz as billing, unit_id_fz as unit_id, p1.product, fleet_year_fz as fleet_year, contract_cust_id as customer, p1.contract_num,
        contract_lease_type as contract, cost, nbv, age_x_ceu as weighted_age, ceu_fz as ceu, teu_fz as teu, rent as rent, rml_x_ceu_c as rml, cust_country
        from fll_t_dw.biz_ads_fir_pkg_data p1
        inner join 
        (select contract_num, product
        from(
        select contract_num, product, count(*) num
        from fll_t_dw.biz_ads_fir_pkg_data
        WHERE query_id='{1}'
        group by 1, 2
        ) p1 
        where num >= {0}) p2
        on p1.contract_num=p2.contract_num and p1.product=p2.product
        WHERE query_id='{1}'
        """.format(param["numContractProductLimit"], queryID)
        data = pd.read_sql(sqlInput, conn)
        if data.shape[0] == 0:
            raise Exception("No Data Available!")
        print('Input data shape:', data.shape)
        conn.close()
    except Exception as e:
        print(e)
        ReportStatus("Loading Data from GreenPlum Failed!", 'F', queryID)
        exit(1)

    return param, data

def OutputPackage(data, result, queryID):
    """
    Output final package to biz_model.biz_fir_asset_package.
    """
    sqlOutput = "insert into fll_t_dw.biz_fir_asset_package (unit_id, query_id, id, is_void, version, query_version) values %s"
    try:
        conn = psycopg2.connect(host = "10.18.35.245", port = "5432", dbname = "iflorensgp", user = "fluser", password = "13$vHU7e")
        conn.autocommit = True
        cur = conn.cursor()
        print('Writing data...')
        values_list = []
        for i in range(len(result)):
            if result[i]:
                values_list.append((data['unit_id'][i], queryID, uuid.uuid1().hex, 0, 0, 2))
        psycopg2.extras.execute_values(cur, sqlOutput, values_list)
        conn.commit()
        conn.close()
    except Exception as e:
        print(e) 
        ReportStatus("Writing data to GreenPlum Failed!", 'F', queryID)
        exit(1)


In [4]:
param, data = ConnectDatabase(queryID)

Parameters reading...
{'prefer': {'maxOrMin': 0, 'nbvorCost': 1}, 'objective': None, 'timeLimit': 400, 'totalNBVFrom': 100000000.0, 'totalNBVTo': 1024358002260000.0, 'totalCostFrom': None, 'totalCostTo': 1111333808000000.0, 'containersAge': {'basis': None, 'average': {'symbol': None, 'averageContainersAge': None}, 'list': []}, 'weightedAge': {'basis': None, 'average': {'symbol': None, 'averageWeighedAge': None}, 'list': []}, 'lessee': {'basis': None, 'others': {'lessee': [], 'symbol': 0, 'percent': 0.0}, 'list': [], 'allList': ['Top_Lessee', 'Top2_Total', 'Top3_Total', 'Others', 'MSC', 'CN1I', 'New Factory Container', 'COSMR', 'CMA', 'CN5I', 'YANG', 'SITC', 'WHS', 'ONE', 'ESSC', 'WH', 'MSK', 'SINOKOR', 'CKLINE', 'RCL', 'EGREN', 'SSCI', 'HTHK', 'DYOUNG', 'KMTC', 'JINJIANG', 'SMLINE', 'CUL', 'SINOHK', 'SINOCL', 'TSLINE', 'HALINE', 'GOODRICH', 'MATSN', 'SRC', 'HARBOUR', 'CHINAV', 'CSSC', 'MSCL', 'YZHF', 'CH', 'LIANYUN'], 'topLessee': {'top1': {'symbol': 0, 'percent': 0.0}, 'top2': {'symbo

In [35]:
df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                    [3, 4, np.nan, 1],
                    [np.nan, np.nan, np.nan, np.nan],
                    [np.nan, 3, np.nan, 4]],
                   columns=list("ABCD"))


In [36]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,
3,,3.0,,4.0


In [52]:
a = data['customer'][:10]

In [54]:
a = a.fillna(value='None')
a.value_counts()

YANG    4
MSC     3
None    2
CMA     1
Name: customer, dtype: int64

In [7]:
print("==============================================================")
print('Data processing...')
start_time = time.time()
numData = data.shape[0]

# One hot all lessees
lesseeIndex = {k: v for v, k in enumerate(data['customer'].value_counts().index)}
row = [lesseeIndex[data['customer'][i]] for i in range(numData)]
lesseeOneHot = csr_matrix(([1 for _ in range(numData)], (row, col)), shape=(len(data['customer'].value_counts()), numData))


Data processing...


KeyError: None

In [34]:
dict(data['customer'].value_counts())


{'MSC': 35745,
 'CN1I': 29505,
 'COSMR': 12395,
 'CMA': 10948,
 'CN5I': 8627,
 'YANG': 5866,
 'SITC': 3094,
 'WHS': 2994,
 'ONE': 2497,
 'ESSC': 1851,
 'WH': 1598,
 'MSK': 1296,
 'SINOKOR': 1000,
 'RCL': 950,
 'CKLINE': 925,
 'EGREN': 896,
 'SSCI': 660,
 'HTHK': 600,
 'DYOUNG': 500,
 'KMTC': 500,
 'SINOHK': 300,
 'CUL': 300,
 'SMLINE': 250,
 'JINJIANG': 250,
 'SINOCL': 249,
 'TSLINE': 200,
 'HALINE': 199,
 'MATSN': 150,
 'SRC': 125,
 'GOODRICH': 100,
 'HARBOUR': 100}

In [12]:
lesseeIndex

{'MSC': 0,
 'CN1I': 1,
 'COSMR': 2,
 'CMA': 3,
 'CN5I': 4,
 'YANG': 5,
 'SITC': 6,
 'WHS': 7,
 'ONE': 8,
 'ESSC': 9,
 'WH': 10,
 'MSK': 11,
 'SINOKOR': 12,
 'RCL': 13,
 'CKLINE': 14,
 'EGREN': 15,
 'SSCI': 16,
 'HTHK': 17,
 'KMTC': 18,
 'DYOUNG': 19,
 'CUL': 20,
 'SINOHK': 21,
 'JINJIANG': 22,
 'SMLINE': 23,
 'SINOCL': 24,
 'TSLINE': 25,
 'HALINE': 26,
 'MATSN': 27,
 'SRC': 28,
 'GOODRICH': 29,
 'HARBOUR': 30}

In [4]:
param, data = ConnectDatabase(queryID)

Parameters reading...
{'prefer': {'maxOrMin': 0, 'nbvorCost': 1}, 'objective': None, 'timeLimit': 400, 'totalNBVFrom': 100000000.0, 'totalNBVTo': 1024358002260000.0, 'totalCostFrom': None, 'totalCostTo': 1111333808000000.0, 'containersAge': {'basis': None, 'average': {'symbol': None, 'averageContainersAge': None}, 'list': []}, 'weightedAge': {'basis': None, 'average': {'symbol': None, 'averageWeighedAge': None}, 'list': []}, 'lessee': {'basis': None, 'others': {'lessee': [], 'symbol': 0, 'percent': 0.0}, 'list': [], 'allList': ['Top_Lessee', 'Top2_Total', 'Top3_Total', 'Others', 'MSC', 'CN1I', 'New Factory Container', 'COSMR', 'CMA', 'CN5I', 'YANG', 'SITC', 'WHS', 'ONE', 'ESSC', 'WH', 'MSK', 'SINOKOR', 'CKLINE', 'RCL', 'EGREN', 'SSCI', 'HTHK', 'DYOUNG', 'KMTC', 'JINJIANG', 'SMLINE', 'CUL', 'SINOHK', 'SINOCL', 'TSLINE', 'HALINE', 'GOODRICH', 'MATSN', 'SRC', 'HARBOUR', 'CHINAV', 'CSSC', 'MSCL', 'YZHF', 'CH', 'LIANYUN'], 'topLessee': {'top1': {'symbol': 0, 'percent': 0.0}, 'top2': {'symbo

In [5]:
statusOneHot, \
lesseeIndex, lesseeOneHot, \
contractIndex, contractOneHot, \
contractTypeIndex, contractTypeOneHot, \
productIndex, productOneHot, \
containerAgeOneHot, weightedAgeOneHot, rmlOneHot, countryOneHot \
= DataProcessing(data)

Data processing...


KeyError: None