In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from gurobipy import *
import time
import datetime
import scipy.stats as st
from fitter import Fitter

import warnings
warnings.filterwarnings("ignore")

In [2]:
data=pd.read_csv('demand_mask.csv')

In [3]:
data.head(5)

Unnamed: 0,District,Lane,Date,Express,WEPS,WWEF,Crew Bid Express Forecast,Crew Bid WEPS Forecast,Crew Bid WWEF Forecast
0,CN_SOUTH_CHINA,SCN-SZX HUB (US),1-Jan-19,,,,6664.0,1501.0,82.0
1,CN_SOUTH_CHINA,SCN-SZX HUB (US),2-Jan-19,,,,106442.0,23503.0,1260.0
2,CN_SOUTH_CHINA,SCN-SZX HUB (US),3-Jan-19,506414.0,80078.0,3318.0,271224.0,59997.0,3230.0
3,CN_SOUTH_CHINA,SCN-SZX HUB (US),4-Jan-19,429955.0,68851.0,3032.0,255762.0,56477.0,3064.0
4,CN_SOUTH_CHINA,SCN-SZX HUB (US),5-Jan-19,549524.0,65329.0,1458.0,326884.0,71435.0,3873.0


In [4]:
data.columns.values

array(['District', 'Lane', 'Date', 'Express', 'WEPS', 'WWEF',
       'Crew Bid Express Forecast', 'Crew Bid WEPS Forecast',
       'Crew Bid WWEF Forecast'], dtype=object)

In [5]:
data=data.fillna(0)

In [6]:
data['real_d']=data['Express']+data['WEPS']+data['WWEF']
data['predict_d']=data['Crew Bid Express Forecast']+data['Crew Bid WEPS Forecast']+data['Crew Bid WWEF Forecast']

In [7]:
data['date']=pd.to_datetime(data['Date'],format='%d-%b-%y')
def match_quarter(x):
    if 0<=x<=3:
        quarter=1
    elif 4<=x<=6:
        quarter=23
    elif 7<=x<=9:
        quarter=3
    elif 10<=x<=12:
        quarter=4
    return quarter

def three_period(x):
    if 0<=x<=3:
        period=1
    elif 4<=x<=9:
        period=2
    elif 10<=x<=12:
        period=3
    return period

In [8]:
for i in range(len(data)):
    timeArray=time.strptime(data.loc[i,'Date'],'%d-%b-%y')
    timeStamp = int(time.mktime(timeArray))
    dateArray = datetime.datetime.fromtimestamp(timeStamp)
    data.loc[i,'weekday']=dateArray.isoweekday()
    data.loc[i,'month']=dateArray.month
    data.loc[i,'year']=dateArray.year
    data.loc[i,'quarter']=match_quarter(data.loc[i,'month'])
    data.loc[i,'period']=three_period(data.loc[i,'month'])

In [9]:
column=['Lane','date','real_d','predict_d','weekday','month','year','quarter','period']
data=data[column]

In [10]:
data.tail(5)

Unnamed: 0,Lane,date,real_d,predict_d,weekday,month,year,quarter,period
4381,HK HUB (EU),2020-12-27,0.0,0.0,7.0,12.0,2020.0,4.0,3.0
4382,HK HUB (EU),2020-12-28,0.0,305708.0,1.0,12.0,2020.0,4.0,3.0
4383,HK HUB (EU),2020-12-29,0.0,585492.0,2.0,12.0,2020.0,4.0,3.0
4384,HK HUB (EU),2020-12-30,0.0,578380.0,3.0,12.0,2020.0,4.0,3.0
4385,HK HUB (EU),2020-12-31,0.0,548991.0,4.0,12.0,2020.0,4.0,3.0


In [11]:
data=data[(data['date']<=pd.datetime(2020,10,28))]

In [12]:
data.tail(5)

Unnamed: 0,Lane,date,real_d,predict_d,weekday,month,year,quarter,period
4317,HK HUB (EU),2020-10-24,607855.0,803961.0,6.0,10.0,2020.0,4.0,3.0
4318,HK HUB (EU),2020-10-25,0.0,0.0,7.0,10.0,2020.0,4.0,3.0
4319,HK HUB (EU),2020-10-26,572106.0,416954.0,1.0,10.0,2020.0,4.0,3.0
4320,HK HUB (EU),2020-10-27,463869.0,230336.0,2.0,10.0,2020.0,4.0,3.0
4321,HK HUB (EU),2020-10-28,516886.0,642719.0,3.0,10.0,2020.0,4.0,3.0


In [13]:
data.reset_index(drop=True,inplace=True)

In [16]:
data.to_csv('/Users/beijiale/Desktop/demand_ups.csv')

In [17]:
data=pd.read_csv('demand_ups.csv',index_col=0,header=0)

In [18]:
#We just select SZX-US and HK-US two lanes
#input: 1,3,SZX/US,(default:2019),predict/real

# eg. df=data_select(1,3,2019,'SZX','predict')

lane_dict={'SZX':'SCN-SZX HUB (US)','HK':'HK HUB (US)'}
type_dict={'predict':'predict_d','real':'real_d'}

def data_select(weekday,period,year,lane,datatype='predict'):   
    df=data[(data['Lane']==lane_dict[lane]) & (data['weekday']==weekday) 
            &(data['period']==period) &(data['year']==year)]
    column=['Lane', 'date',type_dict[datatype] ,'weekday', 'month', 'year',
       'quarter', 'period']
    df=df[column]
    df=df[(df[type_dict[datatype]]>=1000)]
    df.reset_index(drop=True,inplace=True)
    df=df.rename(columns={type_dict[datatype]:'demand'})
    return df

In [20]:
df=data_select(1,3,2020,'SZX','predict')
df.head(5)

Unnamed: 0,Lane,date,demand,weekday,month,year,quarter,period
0,SCN-SZX HUB (US),2020-10-12,645493.0,1.0,10.0,2020.0,4.0,3.0
1,SCN-SZX HUB (US),2020-10-19,343228.0,1.0,10.0,2020.0,4.0,3.0
2,SCN-SZX HUB (US),2020-10-26,327317.0,1.0,10.0,2020.0,4.0,3.0
