# Model:

$$\max  \quad \sum_{k=1}^{K}{{w_k(R\sum_{j=1}^{N_C}{y_{jk}} -  \sum_{i=1}^{N_S}{[z_iFC_i + c_iVC_i]} + \sum_{i=1}^{N_S}{\sum_{j=1}^{N_C}{s_{ijk}t_{ij}}}})}$$
$$s.t.$$
$$y_{jk} \leq \sum_{i=1}^{N_S}{x_{ijk}}  \quad \forall{j}\quad\quad(1)$$
$$y_{jk} \leq \hat{D}_{jk}  \quad \forall{j}\quad\quad(2)$$
$$\sum_{j=1}^{N_C}{x_{ijk}} \leq c_i \quad \forall{i}\quad\quad(3)$$  
$$x_{ijk} \leq B_is_{ijk} \quad \forall{i} \forall{j}\quad\quad(4)$$
$$c_i \leq B_iz_i \quad \forall{i}\quad\quad(5)$$
<center>$s_{ijk} \leq z_i \quad \forall{i}\forall{j}\quad\quad(6)$

## Inputs:

- $N_S$: Total number of potential suppliers
- $N_C$: Total number of customers
- $FC_i$: Fixed cost for contracting supplier $i$
- $VC_i$: Amount ordered from contracted supplier $i$
- $T_{i,j}$: Distance from supplier $i$ to customer $j$ in miles
- $B_i$: Maximum inventory of supplier $i$
- $\hat{D}_{jk}$: Uncerstain demand of county $j$ for observation $k$
- $R$: revenue per product sold

## Decision Variables:

- $z_i$: Indicates whether supplier $i$ is contracted (= 1 if opened, = 0 otherwise)
- $c_i$: Inventory ordered from supplier $i$
- $s_{ijk}$: Indicates whether customer $j$ is supplied by supplier $i$ for observation $k$
- $x_{ijk}$: Total number of goods shipped from supplier $i$ to customer $j$ for observation $k$
- $y_{jk}$: Total number of goods bought by customer $j$ for observation $k$

In [1]:
using Distances, JuMP, Gurobi, CSV, DataFrames, Formatting, Plots, Statistics, Distributions;
gurobi_env = Gurobi.Env();
ENV["COLUMNS"]=120;

Academic license - for non-commercial use only - expires 2022-08-19


In [2]:
alldata_train = CSV.read("data/alldata_train.csv", DataFrame);
alldata_test = CSV.read("data/alldata_test.csv", DataFrame);
supplierInfo = CSV.read("data/supplierInfo.csv", DataFrame)[4:end,:];

# TAKES >2X AS LONG
#alldata_train[:,:Sales] = alldata_train[:,:Sales]./10
#alldata_test[:,:Sales] = alldata_test[:,:Sales]./10;

# Functions

In [3]:
# function to get test point from test set

function getTestPoint(testSet, WeekNum)
    
    testWks = filter(row -> row.Week_Num == WeekNum, alldata_test)
    
    sort!(testWks, :custID)
    
    return testWks
end    

function getTestRange(testSet, WeekNum)
    
    testWks = filter(row -> row.Week_Num <= WeekNum, alldata_test)
    
    sort!(testWks, :custID)
    
    return testWks
end  

# function to find distances between test point and suppliers
function getDistances(testPoint, supInfo)
    
    NS = size(supInfo)[1]
    NC = size(testPoint)[1]
    
    distances = zeros(NS, NC);
    for i=1:NS
        for j=1:NC
            distances[i,j] = haversine((supInfo[i,3],supInfo[i,4]), (testPoint[j,3],testPoint[j,4]), 3958.8)
        end
    end
    
    return distances
    
end

getDistances (generic function with 1 method)

In [4]:
# function for finding the demand = average demand for nearest 5 neighbors for item in last 100 days
function findKNNDemand(customer, testWeek, trainSet, KNN)
    
    holder = zeros(KNN)
    oneCust = filter(row->row.custID==customer, trainSet)
    oneCust_comps = Matrix(select(oneCust, [:Income, :Population, :Season]))

    mean1=mean(oneCust_comps[:,1])
    std1=std(oneCust_comps[:,1])
    mean2=mean(oneCust_comps[:,2])
    std2=std(oneCust_comps[:,2])
    mean3=mean(oneCust_comps[:,3])
    std3=std(oneCust_comps[:,3])

    oneCust_comps[:,1] = (oneCust_comps[:,1].-mean1)./std1
    oneCust_comps[:,2] = (oneCust_comps[:,2].-mean2)./std2
    oneCust_comps[:,3] = (oneCust_comps[:,3].-mean3)./std3

    testpoint = filter(row->row.custID==customer, testWeek)
    testpoint_comps = Matrix(select(testpoint, [:Income, :Population, :Season]))
    testpoint_comps[:,1] = (testpoint_comps[:,1].-mean1)./std1
    testpoint_comps[:,2] = (testpoint_comps[:,2].-mean2)./std2
    testpoint_comps[:,3] = (testpoint_comps[:,3].-mean3)./std3

    diffs = zeros(size(oneCust)[1])

    # find distance
    for i=1:size(oneCust)[1]
        diffs[i] = sum(((testpoint_comps[1,1]-oneCust_comps[i,1])^2)
        +((testpoint_comps[1,2]-oneCust_comps[i,2])^2)
        +((testpoint_comps[1,3]-oneCust_comps[i,3])^2))
    end

    # append demand
    df = DataFrame(diff = Vector(diffs[:,1]), sales = Vector(oneCust[:,10]))
    sort!(df)

    for i=1:KNN
        holder[i] = df[i,2]
    end

    return holder
    
end

findKNNDemand (generic function with 1 method)

In [5]:
# Optimization Model - With Prescription
function optPres(Dem)
    
    NS = size(supplierInfo)[1]
    NC = size(Dem)[1]
    
    # GET K FROM SIZE OF Dem
    if isa(Dem, Vector) == true
        K = 1
    else
        K = size(Dem)[2]
    end
    
    weight = zeros(K)
    weight .= 1/K
    # set weights

    model = Model(with_optimizer(Gurobi.Optimizer,TimeLimit=60, gurobi_env));

    @variable(model, z[i=1:NS], Bin)
    @variable(model, c[i=1:NS]>=0)
    @variable(model, s[i=1:NS,j=1:NC,k=1:K],Bin)
    @variable(model, x[i=1:NS,j=1:NC,k=1:K]>=0)
    @variable(model, y[j=1:NC,k=1:K]>=0)

    @constraint(model, [j=1:NC,k=1:K], y[j,k] <= sum(x[i,j,k] for i=1:NS))
    @constraint(model, [j=1:NC,k=1:K], y[j,k] <= Dem[j,k])
    @constraint(model, [i=1:NS,k=1:K], sum(x[i,j,k] for j=1:NC) <= c[i])
    @constraint(model, [i=1:NS,j=1:NC,k=1:K], x[i,j,k] <= B[i]*s[i,j,k])
    @constraint(model, [i=1:NS], c[i] <= B[i]*z[i])
    @constraint(model, [i=1:NS,j=1:NC,k=1:K], s[i,j,k] <= z[i])

    @objective(model, Max, sum(weight[k]*(R*sum(y[j,k] for j=1:NC) - sum(z[i]*FC[i] + c[i]*VC[i] for i=1:NS) - sum(shipCostperMile*s[i,j,k]*T[i,j] for i=1:NS, j=1:NC)) for k=1:K))

    set_optimizer_attribute(model, "OutputFlag", 0)
    optimize!(model)
    
    return objective_value(model), value.(c)
    
end

optPres (generic function with 1 method)

In [6]:
# Optimization Model - With Prescription
function optPresRes(Dem)
    
    NS = size(supplierInfo)[1]
    NC = size(Dem)[1]
    
    # GET K FROM SIZE OF Dem
    if isa(Dem, Vector) == true
        K = 1
    else
        K = size(Dem)[2]
    end
    
    weight = [0.1,0.2,0.4,0.2,0.1]
    # set weights

    model = Model(with_optimizer(Gurobi.Optimizer,TimeLimit=60, gurobi_env));

    @variable(model, z[i=1:NS], Bin)
    @variable(model, c[i=1:NS]>=0)
    @variable(model, s[i=1:NS,j=1:NC,k=1:K],Bin)
    @variable(model, x[i=1:NS,j=1:NC,k=1:K]>=0)
    @variable(model, y[j=1:NC,k=1:K]>=0)

    @constraint(model, [j=1:NC,k=1:K], y[j,k] <= sum(x[i,j,k] for i=1:NS))
    @constraint(model, [j=1:NC,k=1:K], y[j,k] <= Dem[j,k])
    @constraint(model, [i=1:NS,k=1:K], sum(x[i,j,k] for j=1:NC) <= c[i])
    @constraint(model, [i=1:NS,j=1:NC,k=1:K], x[i,j,k] <= B[i]*s[i,j,k])
    @constraint(model, [i=1:NS], c[i] <= B[i]*z[i])
    @constraint(model, [i=1:NS,j=1:NC,k=1:K], s[i,j,k] <= z[i])

    @objective(model, Max, sum(weight[k]*(R*sum(y[j,k] for j=1:NC) - sum(z[i]*FC[i] + c[i]*VC[i] for i=1:NS) - sum(shipCostperMile*s[i,j,k]*T[i,j] for i=1:NS, j=1:NC)) for k=1:K))

    set_optimizer_attribute(model, "OutputFlag", 0)
    optimize!(model)
    
    return objective_value(model), value.(c)
    
end

optPresRes (generic function with 1 method)

In [7]:
# Optimization Model - No Prescription Yet
function checkReality(contr, trueDem)
    
    NS = size(supplierInfo)[1]
    NC = size(trueDem)[1]

    model = Model(with_optimizer(Gurobi.Optimizer,TimeLimit=60, gurobi_env));

    @variable(model, z[i=1:NS], Bin)
    @variable(model, c[i=1:NS]>=0)
    @variable(model, k[i=1:NS,j=1:NC], Bin)
    @variable(model, x[i=1:NS,j=1:NC]>=0)
    @variable(model, y[j=1:NC]>=0)

    @constraint(model, [i=1:NS], c[i] == contr[i])
    
    @constraint(model, [j=1:NC], y[j] <= sum(x[i,j] for i=1:NS))
    @constraint(model, [j=1:NC], y[j] <= trueDem[j])
    @constraint(model, [i=1:NS], sum(x[i,j] for j=1:NC) <= c[i])
    @constraint(model, [i=1:NS,j=1:NC], x[i,j] <= B[i]*k[i,j])
    @constraint(model, [i=1:NS], c[i] <= B[i]*z[i])
    @constraint(model, [i=1:NS,j=1:NC], k[i,j] <= z[i])

    @objective(model, Max, R*sum(y[j] for j=1:NC) - sum(z[i]*FC[i] + c[i]*VC[i] for i=1:NS) - sum(shipCostperMile*k[i,j]*T[i,j] for i=1:NS, j=1:NC))

    set_optimizer_attribute(model, "OutputFlag", 0)
    optimize!(model)
    
    return objective_value(model), value.(c)
    
end

checkReality (generic function with 1 method)

In [8]:
function testApproaches(testWkNum, KNN_K)
    
    # get test week and true demand
    testWk = getTestPoint(alldata_test, testWkNum)
    trueD = testWk[:,:Sales]
    
    # get num customers
    NC = size(testWk)[1]
    
    trainSet = deepcopy(alldata_train)

    if testWkNum != 147
        addToTrain = getTestRange(alldata_test, testWkNum-1)
        trainSet = [trainSet;addToTrain]
    end
    
    # get neightbors
    knnD = zeros(NC,KNN_K)
    for i=1:NC
        d_holder = findKNNDemand(i, testWk, trainSet, KNN_K)
        for k=1:KNN_K
            knnD[i,k] = d_holder[k]
        end
    end
    
    # get average sales of neighbors
    avgD = mean(knnD, dims=2);
    
    # Oracle Approach
    oracle_profEst, oracle_contr = optPres(trueD)
    oracle_profReal, oracle_realContr = checkReality(oracle_contr, trueD)
    
    # KNN Approach
    knn_profEst, knn_contr = optPres(knnD)
    knn_profReal, knn_realContr = checkReality(knn_contr, trueD)
    
    # Avg KNN Sales Approach
    avg_profEst, avg_contr = optPres(avgD)
    avg_profReal, avg_realContr = checkReality(avg_contr, trueD)
    
    return oracle_profReal, knn_profReal, avg_profReal, sum(oracle_contr), sum(knn_contr), sum(avg_contr)
    
#     println("Estimated Profit:")
#     println(" - Oracle Approach:         \$", oracle_profEst)
#     println(" - KNN Prescript. Approach: \$", knn_profEst)
#     println(" - KNN Average Approach:    \$", avg_profEst)
#     println("")
#     println("Realized Profit:")
#     println(" - Oracle Approach:         \$", oracle_profReal)
#     println(" - KNN Prescript. Approach: \$", knn_profReal)
#     println(" - KNN Average Approach:    \$", avg_profReal)
    
end

testApproaches (generic function with 1 method)

In [9]:
function getResidualsD(residCustID, testPrediction)
    
    D = zeros(size(testPrediction)[1],5);
    for i=1:size(testPrediction)[1]
        residCust = filter(row -> row.custID == i, residCustID)
        res_std = std(residCust[:,1])
        D[i,1] = max(testPrediction[i] - 2*res_std,0)
        D[i,2] = max(testPrediction[i] - res_std,0)
        D[i,3] = testPrediction[i] 
        D[i,4] = testPrediction[i] + res_std
        D[i,5] = testPrediction[i] + 2*res_std
    end
    
    return D
    
end

getResidualsD (generic function with 1 method)

# Get Constant Parameters

In [15]:
#OLD STUFF

# choose random tst day for distances - they're all the same # of customers
testCust = getTestPoint(alldata_test, 147)
T = getDistances(testCust, supplierInfo)

FC = supplierInfo[:,5].*0
#VC = supplierInfo[:,6]
VC = zeros(17)
VC .= 100
B = supplierInfo[:,end]./100
R = 500 # can change this
shipCostperMile = 1;

In [139]:
# Accurate Prices

# testCust = getTestPoint(alldata_test, 147)
# T = getDistances(testCust, supplierInfo)

# R = 25.99

# B = supplierInfo[:,end]./10
# VC = zeros(size(supplierInfo)[1])
# VC .= 10.99
# FC = zeros(size(supplierInfo)[1])
# FC .= 0  # maybe change to 1.99
# shipCostperMile = 2.9;


## "Tuning" Parameters

In [12]:
## just take wk147 as demand
D = testCust[:,:Sales];
testProfit, testContr = optPres(D);
testProfit

5.757357426239999e7

In [13]:
# get test week and true demand
testWk = getTestPoint(alldata_test, 147)
trueD = testWk[:,:Sales]

# get num customers
NC = size(testWk)[1]

trainSet = deepcopy(alldata_train)

if 148 != 147
    addToTrain = getTestRange(alldata_test, 148-1)
    trainSet = [trainSet;addToTrain]
end

# get neightbors
knnD = zeros(NC,5)
for i=1:NC
    d_holder = findKNNDemand(i, testWk, trainSet, 5)
    for k=1:5
        knnD[i,k] = d_holder[k]
    end
end

# get average sales of neighbors
avgD = mean(knnD, dims=2);

# Oracle Approach
oracle_profEst, oracle_contr = optPres(trueD)
oracle_profReal, oracle_realContr = checkReality(oracle_contr, trueD)

# KNN Approach
knn_profEst, knn_contr = optPres(knnD)
knn_profReal, knn_realContr = checkReality(knn_contr, trueD);

In [14]:
knn_profReal

5.7445748751323335e7

# KNN Prescription vs. KNN Average

In [15]:
testWeekNums = [147:156;];
knnResults = zeros(size(testWeekNums)[1],7)

for i=1:size(testWeekNums)[1]
    
    orac, knnpres, knnavg, totalD, knnpres_contr, knnavg_contr = testApproaches(testWeekNums[i], 5)
    
    knnResults[i,1] = testWeekNums[i]
    knnResults[i,2] = orac
    knnResults[i,3] = knnpres
    knnResults[i,4] = knnavg
    knnResults[i,5] = totalD
    knnResults[i,6] = knnpres_contr
    knnResults[i,7] = knnavg_contr  
    
end

knnResults

10×7 Matrix{Float64}:
 147.0  5.75743e7  5.745e7    5.73089e7       1.4404e5   145276.0             1.43366e5
 148.0  5.70369e7  5.67753e7  5.69925e7  142697.0             1.45311e5       1.43138e5
 149.0  6.68157e7  5.6912e7   5.68824e7  167149.0             1.42319e5       1.42243e5
 150.0  6.62138e7  5.69337e7  5.83449e7       1.65645e5       1.42374e5       1.45907e5
 151.0  6.81266e7  6.53677e7  6.02345e7       1.70427e5       1.63486e5  150633.0
 152.0  6.84365e7  6.58703e7  6.23615e7  171201.0             1.64741e5       1.5595e5
 153.0  6.89556e7  6.72413e7  6.44835e7       1.72502e5       1.68178e5       1.61266e5
 154.0  6.79322e7  6.75547e7  6.6811e7        1.69941e5       1.68982e5       1.67111e5
 155.0  6.91152e7  6.75876e7  6.70324e7       1.72899e5       1.69046e5       1.67653e5
 156.0  6.88541e7  6.80211e7  6.75829e7       1.72246e5       1.70142e5       1.69039e5

In [16]:
knnResultsDF = DataFrame(knnResults, [:WeekNum, :OracleProf, :KnnPresProf, :KnnAvgProf, :OracleOrdered, :KnnPresOrdered, :KnnAvgOrdered]);

In [17]:
knnResultsDF

Unnamed: 0_level_0,WeekNum,OracleProf,KnnPresProf,KnnAvgProf,OracleOrdered,KnnPresOrdered,KnnAvgOrdered
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,147.0,57574300.0,57450000.0,57308900.0,144040.0,145276.0,143366.0
2,148.0,57036900.0,56775300.0,56992500.0,142697.0,145311.0,143138.0
3,149.0,66815700.0,56912000.0,56882400.0,167149.0,142319.0,142243.0
4,150.0,66213800.0,56933700.0,58344900.0,165645.0,142374.0,145907.0
5,151.0,68126600.0,65367700.0,60234500.0,170427.0,163486.0,150633.0
6,152.0,68436500.0,65870300.0,62361500.0,171201.0,164741.0,155950.0
7,153.0,68955600.0,67241300.0,64483500.0,172502.0,168178.0,161266.0
8,154.0,67932200.0,67554700.0,66811000.0,169941.0,168982.0,167111.0
9,155.0,69115200.0,67587600.0,67032400.0,172899.0,169046.0,167653.0
10,156.0,68854100.0,68021100.0,67582900.0,172246.0,170142.0,169039.0


In [18]:
CSV.write("data/knn_results.csv", knnResultsDF)

"data/knn_results.csv"

In [None]:
#custMet = findall(x->x>0.01, shipments);
#contrSup = findall(x->x>0.01, contracts);
#notContrSup = findall(x->x<0.01, contracts);

In [None]:
#scatter(test147[:,3],test147[:,2], markersize=3,format=:png)
#scatter!(supplierInfo[contrSup,4], supplierInfo[contrSup,3],markersize=5,format=:png)
#scatter!(supplierInfo[notContrSup,4], supplierInfo[notContrSup,3],markersize=5,format=:png)

# Regression Model: Getting "neighbors" from residuals 

### Ridge Regression

In [217]:
# LR_res = CSV.read("data/LR_output.csv", DataFrame)
# LR_resCustID = LR_res[:,[:res,:custID]]
# LR_preds = CSV.read("data/LR_output_147to156.csv", DataFrame);
# LR_pred147 = filter(row -> row.Week_Num == 147, LR_preds)
# sort!(LR_pred147, :custID)
# LR_pred147Sales = LR_pred147[:,:Sales]

# LR_res_D = getResidualsD(LR_resCustID, LR_pred147Sales)

765×5 Matrix{Float64}:
  56.2011   63.2342   70.2672   77.3002   84.3333
  41.7676   51.6324   61.4972   71.362    81.2268
  63.9996   72.5049   81.0103   89.5157   98.0211
  57.2221   64.6353   72.0485   79.4618   86.875
 146.7     155.462   164.224   172.986   181.748
  70.3447   76.9636   83.5826   90.2016   96.8206
 327.768   355.942   384.116   412.29    440.464
  30.71     40.7093   50.7085   60.7078   70.7071
 251.357   264.447   277.536   290.625   303.715
  29.272    39.6177   49.9634   60.3091   70.6548
 167.997   176.938   185.88    194.822   203.764
 101.512   107.919   114.325   120.732   127.139
  78.7818   84.223    89.6642   95.1054  100.547
   ⋮                                     
 121.245   128.485   135.725   142.966   150.206
  56.5733   69.4178   82.2624   95.1069  107.951
  99.427   106.645   113.864   121.082   128.3
 729.934   794.417   858.901   923.385   987.869
  46.8792   55.9541   65.0291   74.1041   83.1791
  71.8228   78.4376   85.0524   91.6671   98.281

In [218]:
# # Ridge Residual Approach
# LR_res_profEst, LR_res_contr = optPresRes(LR_res_D)
# LR_res_profReal, LR_res_realContr = checkReality(LR_res_contr, trueD);
# LR_res_profReal

2.0289158475659243e6

### Random Forest

In [215]:
# RF_res = CSV.read("data/RF_output.csv", DataFrame)
# RF_resCustID = RF_res[:,[:res,:custID]]
# RF_preds = CSV.read("data/RF_output_147to156.csv", DataFrame);
# RF_pred147 = filter(row -> row.Week_Num == 147, RF_preds)
# sort!(RF_pred147, :custID)
# RF_pred147Sales = RF_pred147[:,:Sales]

# RF_res_D = getResidualsD(RF_resCustID, RF_pred147Sales)

765×5 Matrix{Float64}:
  23.4337   28.6388   33.844    39.0491   44.2542
  27.5976   30.7208   33.844    36.9671   40.0903
  22.1186   27.9813   33.844    39.7066   45.5693
  23.2092   28.5266   33.844    39.1613   44.4787
  65.9597   80.7754   95.5911  110.407   125.223
  20.8867   27.3653   33.844    40.3226   46.8012
 257.585   300.525   343.465   386.405   429.344
  29.6305   31.7372   33.844    35.9507   38.0574
 145.073   169.1     193.126   217.153   241.179
  30.2726   32.0583   33.844    35.6296   37.4153
  65.6921   80.6416   95.5911  110.541   125.49
  71.7222   83.6567   95.5911  107.526   119.46
  18.5298   26.1869   33.844    41.501    49.1581
   ⋮                                     
  70.9324   83.2617   95.5911  107.92    120.25
  23.0385   28.4412   33.844    39.2467   44.6494
  73.5708   82.3627   91.1547   99.9466  108.739
 587.821   670.466   753.111   835.756   918.401
  25.5675   29.7057   33.844    37.9822   42.1204
  20.1723   27.0081   33.844    40.6798   47.5

In [216]:
# # RF Residual Approach
# RF_res_profEst, RF_res_contr = optPresRes(RF_res_D)
# RF_res_profReal, RF_res_realContr = checkReality(RF_res_contr, trueD);
# RF_res_profReal

1.6154596671162997e6

### XGBoost

In [213]:
# XGB_res = CSV.read("data/XGB_output.csv", DataFrame)
# XGB_resCustID = XGB_res[:,[:res,:custID]]
# XGB_preds = CSV.read("data/XGB_output_147to156.csv", DataFrame);
# XGB_pred147 = filter(row -> row.Week_Num == 147, XGB_preds)
# sort!(XGB_pred147, :custID)
# XGB_pred147Sales = XGB_pred147[:,:Sales]

# XGB_res_D = getResidualsD(XGB_resCustID, XGB_pred147Sales)

765×5 Matrix{Float64}:
  60.7322   63.6763   66.6205    69.5646    72.5088
  32.652    35.0431   37.4342    39.8253    42.2165
  59.6446   63.0674   66.4903    69.9131    73.336
  46.1683   49.0019   51.8356    54.6692    57.5028
 144.764   153.239   161.713    170.188    178.662
  60.6071   64.6535   68.6998    72.7462    76.7926
 420.079   443.652   467.225    490.798    514.37
  16.6269   18.4742   20.3216    22.169     24.0164
 244.619   257.798   270.976    284.155    297.334
  15.5626   17.2158   18.8691    20.5223    22.1755
 158.302   165.834   173.366    180.898    188.43
 111.681   118.288   124.895    131.502    138.108
  64.0048   68.3314   72.658     76.9846    81.3112
   ⋮                                      
 109.179   116.95    124.721    132.491    140.262
  44.6358   47.8932   51.1506    54.408     57.6654
  98.9717  104.285   109.599    114.912    120.225
 867.849   917.435   967.02    1016.61    1066.19
  41.2523   43.8289   46.4056    48.9822    51.5589
  65.5297 

In [214]:
# # XGBoost Residual Approach
# XGB_res_profEst, XGB_res_contr = optPresRes(XGB_res_D)
# XGB_res_profReal, XGB_res_realContr = checkReality(XGB_res_contr, trueD);
# XGB_res_profReal

2.0480639891275358e6

# XGBoost

In [19]:
# load data
XGB_res = CSV.read("data/XGB_output.csv", DataFrame)
XGB_resCustID = XGB_res[:,[:res,:custID]]
XGB_preds = CSV.read("data/XGB_output_147to156.csv", DataFrame);

In [25]:
testWeekNums = [147:156;];
resApproachResults = zeros(size(testWeekNums)[1],2)

for i=1:size(testWeekNums)[1]
    
    XGB_pred = filter(row -> row.Week_Num == testWeekNums[i], XGB_preds)
    sort!(XGB_pred, :custID)
    XGB_predSales = XGB_pred[:,:Sales]
    
    XGB_res_D = getResidualsD(XGB_resCustID, XGB_predSales)  
    
    XGB_res_profEst, XGB_res_contr = optPresRes(XGB_res_D)
    
    testWk = getTestPoint(alldata_test, testWeekNums[i])
    trueD = testWk[:,:Sales]
    
    XGB_res_profReal, XGB_res_realContr = checkReality(XGB_res_contr, trueD);
    
    resApproachResults[i,1] = XGB_res_profReal
    resApproachResults[i,2] = sum(XGB_res_contr)
    
end

resApproachResults

10×2 Matrix{Float64}:
 5.60509e7       1.59274e5
 5.53758e7       1.5931e5
 6.59283e7       1.64905e5
 6.59192e7       1.64895e5
 6.59345e7       1.64907e5
 6.59371e7       1.64908e5
 6.60312e7       1.65143e5
 6.60281e7       1.65144e5
 6.60346e7       1.6515e5
 6.60324e7  165147.0

In [32]:
XGB_results = DataFrame(resApproachResults, [:XBG_profit, :XGBtotalcontr])

Unnamed: 0_level_0,profit,totalcontr
Unnamed: 0_level_1,Float64,Float64
1,56050900.0,159274.0
2,55375800.0,159310.0
3,65928300.0,164905.0
4,65919200.0,164895.0
5,65934500.0,164907.0
6,65937100.0,164908.0
7,66031200.0,165143.0
8,66028100.0,165144.0
9,66034600.0,165150.0
10,66032400.0,165147.0


In [35]:
CSV.write("data/XGB_results.csv", XGB_results)

"data/XGB_results.csv"

# Ridge Regression

In [36]:
# load data
LR_res = CSV.read("data/LR_output.csv", DataFrame)
LR_resCustID = LR_res[:,[:res,:custID]]
LR_preds = CSV.read("data/LR_output_147to156.csv", DataFrame);

In [37]:
testWeekNums = [147:156;];
LR_resApproachResults = zeros(size(testWeekNums)[1],2)

for i=1:size(testWeekNums)[1]
    
    LR_pred = filter(row -> row.Week_Num == testWeekNums[i], LR_preds)
    sort!(LR_pred, :custID)
    LR_predSales = LR_pred[:,:Sales]
    
    LR_res_D = getResidualsD(LR_resCustID, LR_predSales)  
    
    LR_res_profEst, LR_res_contr = optPresRes(LR_res_D)
    
    testWk = getTestPoint(alldata_test, testWeekNums[i])
    trueD = testWk[:,:Sales]
    
    LR_res_profReal, LR_res_realContr = checkReality(LR_res_contr, trueD);
    
    LR_resApproachResults[i,1] = LR_res_profReal
    LR_resApproachResults[i,2] = sum(LR_res_contr)
    
end

LR_resApproachResults

10×2 Matrix{Float64}:
 5.54619e7  165164.0
 5.47418e7       1.65649e5
 6.54671e7  180638.0
 6.46668e7  181123.0
 6.69937e7       1.81759e5
 6.73322e7       1.82245e5
 6.79808e7       1.8225e5
 6.66529e7       1.82736e5
 6.80832e7       1.83222e5
 6.76931e7       1.83859e5

In [38]:
LR_results = DataFrame(LR_resApproachResults, [:LR_Profit, :LRtotalcontr])

Unnamed: 0_level_0,LR_Profit,totalcontr
Unnamed: 0_level_1,Float64,Float64
1,55461900.0,165164.0
2,54741800.0,165649.0
3,65467100.0,180638.0
4,64666800.0,181123.0
5,66993700.0,181759.0
6,67332200.0,182245.0
7,67980800.0,182250.0
8,66652900.0,182736.0
9,68083200.0,183222.0
10,67693100.0,183859.0


In [39]:
CSV.write("data/LR_results.csv", LR_results)

"data/LR_results.csv"

# Testing Ridge and XGBoost Point Predictors

# XGB

In [40]:
testWeekNums = [147:156;];
XGBPointResults = zeros(size(testWeekNums)[1],2)

for i=1:size(testWeekNums)[1]
    
    XGB_pred = filter(row -> row.Week_Num == testWeekNums[i], XGB_preds)
    sort!(XGB_pred, :custID)
    XGB_predSales = XGB_pred[:,:Sales]
     
    XGBPoint_profEst, XGBPoint_contr = optPresRes(XGB_predSales)
    
    testWk = getTestPoint(alldata_test, testWeekNums[i])
    trueD = testWk[:,:Sales]
    
    XGBPoint_profReal, XGBPoint_realcontr = checkReality(XGBPoint_contr, trueD);
    
    XGBPointResults[i,1] = XGBPoint_profReal
    XGBPointResults[i,2] = sum(XGBPoint_contr)
    
end

XGBPointResults

10×2 Matrix{Float64}:
 5.74589e7       1.43746e5
 5.69278e7       1.43782e5
 5.97326e7       1.49377e5
 5.97277e7       1.49367e5
 5.97344e7       1.49378e5
 5.97359e7  149380.0
 5.98296e7       1.49615e5
 5.9828e7        1.49616e5
 5.98327e7       1.49622e5
 5.98313e7       1.49619e5

In [41]:
XGB_pt_results = DataFrame(XGBPointResults, [:XBG_pt_profit, :XGB_pt_totalcontr])

Unnamed: 0_level_0,profit,XGBtotalcontr
Unnamed: 0_level_1,Float64,Float64
1,57458900.0,143746.0
2,56927800.0,143782.0
3,59732600.0,149377.0
4,59727700.0,149367.0
5,59734400.0,149378.0
6,59735900.0,149380.0
7,59829600.0,149615.0
8,59828000.0,149616.0
9,59832700.0,149622.0
10,59831300.0,149619.0


In [42]:
CSV.write("data/XBG_pt_results.csv", XGB_pt_results)

"data/XBG_pt_results.csv"

# Ridge

In [43]:
testWeekNums = [147:156;];
LRPointResults = zeros(size(testWeekNums)[1],2)

for i=1:size(testWeekNums)[1]
    
    LR_pred = filter(row -> row.Week_Num == testWeekNums[i], LR_preds)
    sort!(LR_pred, :custID)
    LR_predSales = LR_pred[:,:Sales]
     
    LRPoint_profEst, LRPoint_contr = optPresRes(LR_predSales)
    
    testWk = getTestPoint(alldata_test, testWeekNums[i])
    trueD = testWk[:,:Sales]
    
    LRPoint_profReal, LRPoint_realcontr = checkReality(LRPoint_contr, trueD);
    
    LRPointResults[i,1] = LRPoint_profReal
    LRPointResults[i,2] = sum(LRPoint_contr)
    
end

LRPointResults

10×2 Matrix{Float64}:
 5.63795e7  141029.0
 5.65684e7       1.41514e5
 6.25767e7  156503.0
 6.27692e7       1.56988e5
 6.30268e7       1.57625e5
 6.32225e7       1.5811e5
 6.32248e7       1.58116e5
 6.34164e7       1.58601e5
 6.36128e7  159087.0
 6.3867e7        1.59724e5

In [45]:
LR_pt_results = DataFrame(LRPointResults, [:LR_pt_profit, :LR_pt_totalcontr])

Unnamed: 0_level_0,LR_pt_profit,LR_pt_totalcontr
Unnamed: 0_level_1,Float64,Float64
1,56379500.0,141029.0
2,56568400.0,141514.0
3,62576700.0,156503.0
4,62769200.0,156988.0
5,63026800.0,157625.0
6,63222500.0,158110.0
7,63224800.0,158116.0
8,63416400.0,158601.0
9,63612800.0,159087.0
10,63867000.0,159724.0


In [46]:
CSV.write("data/LR_pt_results.csv", LR_pt_results)

"data/LR_pt_results.csv"

# SAA

In [19]:
trainY_CustID = alldata_train[:,[:custID, :Week_Num, :Sales]];

In [12]:
saa_D = zeros(756,146);
for i=1:756
    custtrainsales = filter(row->row.custID==i,trainY_CustID)
    sort!(custtrainsales, :Week_Num)
    for j=1:146
        saa_D[i,j] = custtrainsales[j,3]
    end
end

In [22]:
SAA_estProf, SAA_contr = optPres(saa_D);

In [23]:
SAA_estProf

-0.0

In [17]:
alldata_test

Unnamed: 0_level_0,Column1,UniqueCustomer,Latitude,Longitude,Week,Income,Population,Week_Num,Season,Sales,custID
Unnamed: 0_level_1,Int64,String,Float64,Float64,Int64,Float64,Float64,Int64,Int64,Float64,Int64
1,1,"Fairfield County, Connecticut",41.244,-73.363,43,1.03677e5,9.4706e5,147,11,1430.0,210
2,2,"Fairfield County, Connecticut",41.244,-73.363,44,1.03711e5,9.4709e5,148,11,1240.0,210
3,3,"Fairfield County, Connecticut",41.244,-73.363,45,1.03746e5,947120.0,149,12,1830.0,210
4,4,"Fairfield County, Connecticut",41.244,-73.363,46,1.0378e5,9.4715e5,150,12,1210.0,210
5,5,"Fairfield County, Connecticut",41.244,-73.363,47,103815.0,9.4718e5,151,12,1660.0,210
6,6,"Fairfield County, Connecticut",41.244,-73.363,48,1.0385e5,9.4721e5,152,12,1610.0,210
7,7,"Fairfield County, Connecticut",41.244,-73.363,49,1.03884e5,9.47241e5,153,13,1690.0,210
8,8,"Fairfield County, Connecticut",41.244,-73.363,50,1.03919e5,9.47271e5,154,13,2110.0,210
9,9,"Fairfield County, Connecticut",41.244,-73.363,51,1.03953e5,947301.0,155,13,1850.0,210
10,10,"Fairfield County, Connecticut",41.244,-73.363,52,1.03988e5,9.47331e5,156,13,1580.0,210
