In [1]:
import pandas as pd
import numpy as np
from gurobipy import *
from sklearn.cluster import KMeans

import warnings
warnings.filterwarnings("ignore")

data = pd.read_csv('demand_ups.csv')

In [2]:
#We just select SZX-US and HK-US two lanes
#input: 1,3,SZX/US,(default:2019),predict/real

# eg. df=data_select(1,3,2019,'SZX','predict')

lane_dict={'SZX':'SCN-SZX HUB (US)','HK':'HK HUB (US)'}
type_dict={'predict':'predict_d','real':'real_d'}

def data_select(weekday,period,year,lane,datatype='predict'):   
    df=data[(data['Lane']==lane_dict[lane]) & (data['weekday']==weekday) 
            &(data['period']==period) &(data['year']==year)]
    column=['Lane', 'date',type_dict[datatype] ,'weekday', 'month', 'year',
       'quarter', 'period']
    df=df[column]
    df=df[(df[type_dict[datatype]]>=1000)]
    df.reset_index(drop=True,inplace=True)
    df=df.rename(columns={type_dict[datatype]:'demand'})
    return df

In [3]:
df=data_select(1,2,2019,'SZX','predict')

In [4]:
df.head()

Unnamed: 0,Lane,date,demand,weekday,month,year,quarter,period
0,SCN-SZX HUB (US),2019-04-01,239847.0,1.0,4.0,2019.0,23.0,2.0
1,SCN-SZX HUB (US),2019-04-08,153697.0,1.0,4.0,2019.0,23.0,2.0
2,SCN-SZX HUB (US),2019-04-15,305550.0,1.0,4.0,2019.0,23.0,2.0
3,SCN-SZX HUB (US),2019-04-22,316132.0,1.0,4.0,2019.0,23.0,2.0
4,SCN-SZX HUB (US),2019-04-29,304857.0,1.0,4.0,2019.0,23.0,2.0


In [5]:
def weighted_cluster_model(demand,weight,cost_over,cost_under):
    ######### Parameters Set-up ############
    n=len(demand)
    M=1e20
    ######### Model Set-up ############
    m = Model()
    #capacity
    c = m.addVar(vtype=GRB.CONTINUOUS,name = "capacity")#c23
    y1= m.addVars(n,vtype=GRB.CONTINUOUS,name = "y1")
    y2= m.addVars(n,vtype=GRB.CONTINUOUS,name = "y2")
    z1 = m.addVars(n,2,vtype=GRB.BINARY,name='z1')
    z2 = m.addVars(n,2,vtype=GRB.BINARY,name='z2')
    

    # set objective
    m.setObjective( quicksum((cost_over*y1[i]+cost_under*y2[i])*weight[i] for i in range(n)), GRB.MINIMIZE)#

    # add constraint
    m.addConstrs( y1[i]>=c-demand[i] for i in range(n))
    m.addConstrs( y1[i]>=0 for i in range(n))
    m.addConstrs( y1[i]<=c-demand[i]+M*(1-z1[i,0]) for i in range(n))
    m.addConstrs( y1[i]<=M*(1-z2[i,0]) for i in range(n))
    m.addConstrs( z1[i,0]+z2[i,0]>=1 for i in range(n))
    
    m.addConstrs( y2[i]>=-c+demand[i] for i in range(n))
    m.addConstrs( y2[i]>=0 for i in range(n))
    m.addConstrs( y2[i]<=-c+demand[i]+M*(1-z1[i,1]) for i in range(n))
    m.addConstrs( y2[i]<=M*(1-z2[i,1]) for i in range(n))
    m.addConstrs( z1[i,1]+z2[i,1]>=1 for i in range(n))
    

    # Supressing the optimization output
    m.setParam( 'OutputFlag', False )

    # Solving the model
    m.optimize()
    
    return m,c

In [6]:
def cluster(df,k):
    X=df['demand'].values.reshape(-1,1)
    kmeans = KMeans(n_clusters = k, init = 'k-means++')
    kmeans.fit(X)  
    y_kmeans = kmeans.fit_predict(X)
    dmd = df['demand'].values
    pred = pd.DataFrame(dmd,columns=['demand'])
    pred['group'] = pd.DataFrame(y_kmeans)
    df_1=pd.DataFrame()
    df_1['group']=range(k)
    df_1['prob']=pred['group'].value_counts(normalize=True,ascending=True)
    df_1['demand_pred']=pred.groupby(['group'])['demand'].mean()
    return(df_1)

In [7]:
def flexibility_model(demand,c,cost_over,cost_under):
    ######### Model Set-up ############
    m = Model()
    M=1e20

    #flexibility
    x = m.addVar(vtype=GRB.CONTINUOUS,name = "flexibility")#x
    y1= m.addVars(2,vtype=GRB.CONTINUOUS,name = "y1")
    y2= m.addVars(2,vtype=GRB.CONTINUOUS,name = "y2")
    z1 = m.addVars(2,2,vtype=GRB.BINARY,name='z1')
    z2 = m.addVars(2,2,vtype=GRB.BINARY,name='z2')

    # set objective
    m.setObjective( quicksum((cost_over[t]*y1[t]+cost_under[t]*y2[t]) for t in range(2)), GRB.MINIMIZE)#

    # add constraint
    
    m.addConstrs( y1[t]>=c[t]-demand[t]+x*((-1)**t)  for t in range(2))
    m.addConstrs( y1[t]>=0 for t in range(2))
    m.addConstrs( y1[t]<=c[t]-demand[t]+x*((-1)**t)+M*(1-z1[0,t]) for t in range(2))
    m.addConstrs( y1[t]<=M*(1-z2[0,t])  for t in range(2))
    m.addConstrs( z1[0,t]+z2[0,t]>=1 for t in range(2))
    
    m.addConstrs( y2[t]>=-c[t]+demand[t]-x*((-1)**t)  for t in range(2))
    m.addConstrs( y2[t]>=0 for t in range(2))
    m.addConstrs( y2[t]<=-c[t]+demand[t]-x*((-1)**t)+M*(1-z1[1,t]) for t in range(2))
    m.addConstrs( y2[t]<=M*(1-z2[1,t])  for t in range(2))
    m.addConstrs( z1[1,t]+z2[1,t]>=1 for t in range(2))


    
    #m.addConstr( x<=100000 )
    m.addConstr( x>=0 )
    m.addConstr( x<=max(demand[0]-c[0],0) )

    # Supressing the optimization output
    m.setParam( 'OutputFlag', False )

    # Solving the model
    m.optimize()
    
    return m,x

In [8]:
def flexibility_mode_actual(demand,c,cost_over,cost_under,x_predict):
    ######### Model Set-up ############
    m = Model()
    M=1e20

    #flexibility
    x = m.addVar(vtype=GRB.CONTINUOUS,name = "flexibility")#x
    y1= m.addVars(2,vtype=GRB.CONTINUOUS,name = "y1")
    y2= m.addVars(2,vtype=GRB.CONTINUOUS,name = "y2")
    z1 = m.addVars(2,2,vtype=GRB.BINARY,name='z1')
    z2 = m.addVars(2,2,vtype=GRB.BINARY,name='z2')

    # set objective
    m.setObjective( quicksum((cost_over[t]*y1[t]+cost_under[t]*y2[t]) for t in range(2)), GRB.MINIMIZE)#

    # add constraint
    
    m.addConstrs( y1[t]>=c[t]-demand[t]+x*((-1)**t)  for t in range(2))
    m.addConstrs( y1[t]>=0 for t in range(2))
    m.addConstrs( y1[t]<=c[t]-demand[t]+x*((-1)**t)+M*(1-z1[0,t]) for t in range(2))
    m.addConstrs( y1[t]<=M*(1-z2[0,t])  for t in range(2))
    m.addConstrs( z1[0,t]+z2[0,t]>=1 for t in range(2))
    
    m.addConstrs( y2[t]>=-c[t]+demand[t]-x*((-1)**t)  for t in range(2))
    m.addConstrs( y2[t]>=0 for t in range(2))
    m.addConstrs( y2[t]<=-c[t]+demand[t]-x*((-1)**t)+M*(1-z1[1,t]) for t in range(2))
    m.addConstrs( y2[t]<=M*(1-z2[1,t])  for t in range(2))
    m.addConstrs( z1[1,t]+z2[1,t]>=1 for t in range(2))

    
    #m.addConstr( x<=100000 )
    m.addConstr( x>=0 )
    m.addConstr( x<=max(demand[0]-c[0],0) )
    m.addConstr( x<=x_predict)

    # Supressing the optimization output
    m.setParam( 'OutputFlag', False )

    # Solving the model
    m.optimize()
    
    return m,x

In [9]:
def outcome_k(weekday,period,year,k=4):

    df1=data_select(weekday,period,year,'SZX','predict')
    df2=data_select(weekday,period,year,'HK','predict')


    c=[]
    
    df1_1 = cluster(df1,k)
    df2_1 = cluster(df2,k)
    demand1=np.array(df1_1['demand_pred'])
    n1=len(demand1)
    weight1=np.array(df1_1['prob'])
    m1,c1=weighted_cluster_model(demand1,weight1,7.5,9)
    c.append(c1.x)
    
    demand2=np.array(df2_1['demand_pred'])
    n2=len(demand2)
    weight2=np.array(df2_1['prob'])
    m2,c2=weighted_cluster_model(demand2,weight2,5.7,7.2)
    c.append(c2.x)
    
    df21=df1[['date','demand']]
    df21.rename(columns={'demand':'SZX_demand'},inplace=True)
    df22=df2[['date','demand']]
    df22.rename(columns={'demand':'HK_demand'},inplace=True)
    df_flex=pd.merge(df21,df22,how='outer',on='date')
    df_flex=df_flex.fillna(0)
    demand=np.array(df_flex[['SZX_demand','HK_demand']])
    
    cost_over=np.array([7.5,5.7])
    cost_under=np.array([9,7.2])
    
    for i in range(len(demand)):
        m,x=flexibility_model(demand[i],c,cost_over,cost_under)
        df_flex.loc[i,'x_predict']=x.x
        if df_flex.loc[i,'SZX_demand']==0:
            df_flex.loc[i,'pred_cost']=cost_over[1]*max(c[1]-df_flex.loc[i,'HK_demand'],0)+cost_under[1]*max(df_flex.loc[i,'HK_demand']-c[1],0)
        else:
            df_flex.loc[i,'pred_cost']=m.objVal
            
    df_flex['capacity_szx']=c1.x
    df_flex['capacity_hk']=c2.x
    
    # Actual flexibility
    d11=data_select(weekday,period,year,'SZX','real')
    d12=data_select(weekday,period,year,'HK','real')
    
    df_a=d11[['date','demand']]
    df_a.rename(columns={'demand':'SZX_demand_real'},inplace=True)
    df_b=d12[['date','demand']]
    df_b.rename(columns={'demand':'HK_demand_real'},inplace=True)
    df_flex=pd.merge(df_flex,df_a,how='outer',on='date')
    df_flex=pd.merge(df_flex,df_b,how='outer',on='date')
    df_flex=df_flex.fillna(0)
    
    for i in range(len(df_flex)):
        x_predict=df_flex.loc[i,'x_predict']
        demand1=df_flex.loc[i,'SZX_demand_real']
        demand2=df_flex.loc[i,'HK_demand_real']
        demand=[demand1,demand2]
        m,x=flexibility_mode_actual(demand,c,cost_over,cost_under,x_predict)
        df_flex.loc[i,'x_real']=x.x
        if df_flex.loc[i,'SZX_demand_real']==0:
            if df_flex.loc[i,'HK_demand_real']==0:
                df_flex.loc[i,'real_cost']=0
            else:
                df_flex.loc[i,'real_cost']=cost_over[1]*max(c[1]-df_flex.loc[i,'HK_demand_real'],0)+cost_under[1]*max(df_flex.loc[i,'HK_demand_real']-c[1],0)
        else :
            df_flex.loc[i,'real_cost']=m.objVal 
            
    obj_cost=df_flex['pred_cost'].sum()  
    print('predicted flexible obj cost:',obj_cost)
    print('='*50)
    
    real_cost=df_flex['real_cost'].sum()
    print('actual flexible obj cost:',real_cost)
    print('='*50)
    
    print(df_flex)

In [10]:
for y in (2019,2020):
    for j in (1,2):
        for i in (1,6):
            print('year',y,',period',j,',day',i)
            print(outcome_k(i,j,y))
            print("="*30,"*"*15,"="*30)
            print("   "*20)
            print("="*30,"*"*15,"="*30)

year 2019 ,period 1 ,day 1
Using license file /Users/beijiale/gurobi.lic
Academic license - for non-commercial use only
predicted flexible obj cost: 15353490.9
actual flexible obj cost: 20297112.75
          date  SZX_demand  HK_demand  x_predict    pred_cost  capacity_szx  \
0   2019-01-07    335161.0   476872.0    71419.0   279577.800      263742.0   
1   2019-01-14    344049.0   510309.0    80307.0   584317.800      263742.0   
2   2019-01-21    350900.0   546986.0    87158.0   897719.400      263742.0   
3   2019-01-28     10390.0   719708.0        0.0  3413920.200      263742.0   
4   2019-02-04    145532.0   276723.0        0.0  2213180.175      263742.0   
5   2019-02-18    190868.0   358772.0        0.0  1405480.875      263742.0   
6   2019-02-25    235198.0   503676.0        0.0   247053.075      263742.0   
7   2019-03-04    249073.0   598026.0        0.0   747687.300      263742.0   
8   2019-03-11    262101.0   617137.0        0.0   787576.500      263742.0   
9   2019-03-

In [11]:
for y in (2019,2020):
    for i in (1,6):
        print('year',y,',period 3',',day',i)
        print(outcome_k(i,3,y,3))
        print("="*30,"*"*15,"="*30)
        print("   "*20)
        print("="*30,"*"*15,"="*30)

year 2019 ,period 3 ,day 1
predicted flexible obj cost: 17749345.67142857
actual flexible obj cost: 23140195.31428571
          date  SZX_demand  HK_demand     x_predict     pred_cost  \
0   2019-10-14    374052.0   643353.0      0.000000  2.730140e+06   
1   2019-10-21    382281.0   717718.0      0.000000  2.244542e+06   
2   2019-10-28    378191.0   758882.0      0.000000  2.040582e+06   
3   2019-11-04    377295.0   820709.0      0.000000  1.694888e+06   
4   2019-11-11    554677.0   871050.0      0.000000  7.757934e+04   
5   2019-11-18    531543.0   876667.0      0.000000  2.190674e+05   
6   2019-11-25    560232.0   886683.0   2007.333333  6.262491e+04   
7   2019-12-02    577296.0   938560.0  19071.333333  5.590001e+05   
8   2019-12-09    576920.0   913690.0  18695.333333  3.772289e+05   
9   2019-12-16    548680.0   852588.0      0.000000  2.277902e+05   
10  2019-12-23    409006.0   763663.0      0.000000  1.782218e+06   
11  2019-12-30    474814.0   507478.0      0.000000  2