# Submission Preparation

In [272]:
import pandas as pd
import os
import math
import numpy as np

SYNTH_DATA_PATH = '../data'
OUT_SYNTH_DATA_PATH = '../output'
SOLUTION_FILE = 'clustering/refineries/solution_20_08_2023_21_04_19.csv'

In [273]:
df_submission = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'sample_submission.csv'))
df_submission.head()

Unnamed: 0,year,data_type,source_index,destination_index,value
0,20182019,depot_location,1256,,
1,20182019,depot_location,1595,,
2,20182019,depot_location,1271,,
3,20182019,depot_location,2001,,
4,20182019,depot_location,2201,,


In [274]:
df_submission.dtypes

year                   int64
data_type             object
source_index           int64
destination_index    float64
value                float64
dtype: object

In [275]:
df_submission['data_type'].value_counts()

data_type
biomass_demand_supply    21646
biomass_forecast          4836
pellet_demand_supply       152
depot_location              21
refinery_location            4
Name: count, dtype: int64

In [276]:
df_sol = pd.read_csv(os.path.join(OUT_SYNTH_DATA_PATH, SOLUTION_FILE))
df_sol.columns = ["data_type", "solution"]
df_sol.head()

Unnamed: 0,data_type,solution
0,b_2018_0_252,0.057442
1,b_2018_1_252,13.510995
2,b_2018_2_252,5.321604
3,b_2018_3_252,0.120626
4,b_2018_4_252,0.038879


In [277]:
df_fc = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'Biomass_History_Forecast.csv'))
df_fc.head()

Unnamed: 0,Index,Latitude,Longitude,2018,2019
0,0,20.15456,72.68507,0.057442,0.057442
1,1,20.15456,72.76469,13.510995,13.510995
2,2,20.15456,72.84432,5.321604,5.321604
3,3,20.15456,72.92394,0.120626,0.120626
4,4,20.15456,73.00357,0.038879,0.038879


In [278]:
df_fc_sol = df_fc.copy()
df_fc_sol = df_fc_sol.melt(value_vars=['2018', '2019'], var_name='year', value_name='biomass', id_vars=['Index'])
df_fc_sol['data_type'] = 'biomass_forecast'
df_fc_sol['destination_index'] = None
df_fc_sol = df_fc_sol[['data_type', 'year', 'Index', 'destination_index', 'biomass']]
df_fc_sol.columns = ['data_type', 'year', 'source_index', 'destination_index', 'value']
df_fc_sol.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_forecast,2018,0,,0.057442
1,biomass_forecast,2018,1,,13.510995
2,biomass_forecast,2018,2,,5.321604
3,biomass_forecast,2018,3,,0.120626
4,biomass_forecast,2018,4,,0.038879


In [279]:
df_fc_sol.loc[(df_fc_sol['data_type'] == 'biomass_forecast')&(df_fc_sol['year'] == '2019'), :]

Unnamed: 0,data_type,year,source_index,destination_index,value
2418,biomass_forecast,2019,0,,0.057442
2419,biomass_forecast,2019,1,,13.510995
2420,biomass_forecast,2019,2,,5.321604
2421,biomass_forecast,2019,3,,0.120626
2422,biomass_forecast,2019,4,,0.038879
...,...,...,...,...,...
4831,biomass_forecast,2019,2413,,52.549976
4832,biomass_forecast,2019,2414,,9.861409
4833,biomass_forecast,2019,2415,,7.909709
4834,biomass_forecast,2019,2416,,7.005911


## Biomass

In [280]:
df_sol

Unnamed: 0,data_type,solution
0,b_2018_0_252,0.057442
1,b_2018_1_252,13.510995
2,b_2018_2_252,5.321604
3,b_2018_3_252,0.120626
4,b_2018_4_252,0.038879
...,...,...
4937,x_2040,1.000000
4938,x_1009,1.000000
4939,x_1230,1.000000
4940,x_1440,1.000000


In [281]:
df_sol_proc = df_sol.copy()
df_sol_proc['data_type'] = df_sol_proc['data_type'].str.replace('x_', 'x_20182019_').str.replace('r_', 'r_20182019_')
df_sol_proc = df_sol_proc['data_type'].str.split("_", expand=True)
df_sol_proc.columns = ['data_type', 'year', 'source_index', 'destination_index']

df_sol_proc['value'] = df_sol['solution']
df_sol_proc['data_type'] = df_sol_proc['data_type'].map({'b': 'biomass_demand_supply', 
                                                         'p': 'pellet_demand_supply', 
                                                         'x': 'depot_location', 
                                                         'r': 'refinery_location'})
df_sol_proc = df_sol_proc[df_sol_proc['value'] != 0]

df_sol_proc = pd.concat([df_sol_proc, df_fc_sol])
# df_sol_proc['value'] = df_sol_proc['value'].apply(lambda x: math.ceil(x * 10000) / 10000)
# df_sol_proc.loc[df_sol_proc['data_type'].isin(['biomass_demand_supply']), 'value'] -= 1e-8
# df_sol_proc.loc[df_sol_proc['data_type'].isin(['pellet_demand_supply']), 'value'] -= 1e-8
df_sol_proc.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'subm_cluster_and_refs_round.csv'), index=False)
df_sol_proc.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_demand_supply,2018,0,252,0.057442
1,biomass_demand_supply,2018,1,252,13.510995
2,biomass_demand_supply,2018,2,252,5.321604
3,biomass_demand_supply,2018,3,252,0.120626
4,biomass_demand_supply,2018,4,252,0.038879


In [261]:
df_sol_proc.data_type.value_counts()

data_type
biomass_demand_supply    4868
biomass_forecast         4836
pellet_demand_supply       44
depot_location             22
refinery_location           4
Name: count, dtype: int64

# Solution Checker

## Constraint 2: The processed biomass from each HS <= FC Biomass

In [None]:
forecasted_bio_18[provided_bio_18 <= forecasted_bio_18.iloc[provided_bio_18.index.astype(int), :]].dropna()
(provided_bio_18 - forecasted_bio_18.iloc[provided_bio_18.index.astype(int), :]).values.sum()


-2.4180006974764627e-05

In [263]:
provided_bio_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_demand_supply')].\
    groupby('source_index')[['value']].sum()
provided_bio_18.index = provided_bio_18.index.astype(int)
forecasted_bio_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_forecast')].\
    groupby('source_index')[['value']].sum().reset_index(drop=True)
# provided_bio_18[provided_bio_18 <= forecasted_bio_18.iloc[provided_bio_18.index.astype(int), :]].dropna()
forecasted_bio_18[provided_bio_18 <= forecasted_bio_18.iloc[provided_bio_18.index.astype(int), :]].dropna()

assert np.all(provided_bio_18.values <= forecasted_bio_18.iloc[provided_bio_18.index.astype(int), :].values), "RESTRICTION 2 NOT SATISFIED 2018: {}"

In [264]:
provided_bio_19 = df_sol_proc[(df_sol_proc['year'] == '2098')&(df_sol_proc['data_type'] == 'biomass_demand_supply')].\
    groupby('source_index')[['value']].sum()

forecasted_bio_19 = df_sol_proc[(df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'biomass_forecast')].\
    groupby('source_index')[['value']].sum().reset_index(drop=True)

assert np.all(provided_bio_19.values <= forecasted_bio_19.iloc[provided_bio_19.index.astype(int), :].values), "RESTRICTION 2 NOT SATISFIED 2018"

## 3-4. Can't transport more than storage limit

In [265]:
idx_max_stocked_18 = stocked_bio_18.idxmax()[0]
idx_max_stocked_18

'747'

In [266]:
df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_demand_supply')].\
    groupby('destination_index')[['value']].sum()#.max()

Unnamed: 0_level_0,value
destination_index,Unnamed: 1_level_1
1106,19999.99
1119,19999.99
1128,19999.99
1147,19999.99
122,19999.99
1323,14105.779879
1409,17299.548108
1605,19999.99
1631,5691.068356
1642,19999.99


In [267]:
df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'pellet_demand_supply')].\
    groupby('source_index')[['value']].sum()#.max()

Unnamed: 0_level_0,value
source_index,Unnamed: 1_level_1
1106,19999.9891
1119,19999.9891
1128,19999.9891
1147,19999.9891
122,19999.9891
1323,14105.778979
1409,17299.547208
1605,19999.9909
1631,5691.067456
1642,19999.9909


In [268]:
stocked_bio_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_demand_supply')].\
    groupby('destination_index')[['value']].sum()

stocked_bio_19 = df_sol_proc[(df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'biomass_demand_supply')].\
    groupby('destination_index')[['value']].sum()

# idx_max_stocked_18 = stocked_bio_18.idxmax()[0]
# df_sol_proc.loc[idx_max_stocked_18, 'value'] = 20000.

max_stocked_18 = stocked_bio_18.max()[0]
max_stocked_19 = stocked_bio_19.max()[0]
print(f"Max stocked biomass in 2018: {max_stocked_18}")
print(f"Max stocked biomass in 2019: {max_stocked_19}")
assert (stocked_bio_18 <= 20000.).values.all(), f"Constraint 3 violated for 2018: {max_stocked_18}"
assert (stocked_bio_19 <= 20000.).values.all(), f"Constraint 3 violated for 2019: {max_stocked_19}"

Max stocked biomass in 2018: 19999.990000000013
Max stocked biomass in 2019: 19999.99000000001


In [269]:
processed_bio_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'pellet_demand_supply')].\
    groupby('destination_index')[['value']].sum()

processed_bio_19 = df_sol_proc[(df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'pellet_demand_supply')].\
    groupby('destination_index')[['value']].sum()

# idx_max_processed_18 = processed_bio_18.idxmax()[0]
# df_sol_proc.loc[idx_max_processed_18, 'value'] = 20000.

max_processed_18 = processed_bio_18.max()[0]
max_processed_19 = processed_bio_19.max()[0]
print(f"Max processed biomass in 2018: {max_processed_18}")
print(f"Max processed biomass in 2019: {max_processed_19}")
assert (processed_bio_18 <= 100000.).values.all(), f"Constraint 3 violated for 2018: {max_processed_18}"
assert (processed_bio_19 <= 100000.).values.all(), f"Constraint 3 violated for 2019: {max_processed_18}"

Max processed biomass in 2018: 99999.94812417994
Max processed biomass in 2019: 99999.94812418002


In [244]:
# bm_cond_18 = (df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_demand_supply')
# if max_stocked_18 > 20000:
#     # increment = 20000. - max_stocked_18
#     df_sol_proc.loc[bm_cond, 'value'] = df_sol_proc.loc[bm_cond, 'value'] - 1e-6

# bm_cond_19 = (df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'biomass_demand_supply')
# if max_stocked_19 > 20000:
#     increment = 20000. - max_stocked_19
#     df_sol_proc.loc[bm_cond_19, 'value'] = df_sol_proc.loc[bm_cond_19, 'value'] - 1e-6


# assert (df_sol_proc.loc[bm_cond, 'value'] <= 20000.).values.all(), f"Constraint 3 violated for 2018: {max_stocked_18}"
# assert (df_sol_proc.loc[bm_cond, 'value'] <= 20000.).values.all(), f"Constraint 3 violated for 2019: {max_stocked_19}"


In [224]:
pm_cond_18 = (df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'pellet_demand_supply')
pm_cond_19 = (df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'pellet_demand_supply')

proc_pellet_18 = df_sol_proc[pm_cond_18].\
    groupby('source_index')[['value']].sum()

proc_pellet_19 = df_sol_proc[pm_cond_19].\
    groupby('source_index')[['value']].sum()

max_processed_18 = proc_pellet_18.max()[0]
max_processed_19 = proc_pellet_19.max()[0]


if max_processed_18 > 20000:
    # increment = 20000. - max_stocked_18
    df_sol_proc.loc[pm_cond_18, 'value'] = df_sol_proc.loc[pm_cond_18, 'value'] - 1e-2

if max_processed_19 > 20000:
    # increment = 20000. - max_stocked_18
    df_sol_proc.loc[pm_cond_19, 'value'] = df_sol_proc.loc[pm_cond_19, 'value'] - 1e-2

proc_pellet_18 = df_sol_proc[pm_cond_18].\
    groupby('source_index')[['value']].sum()

proc_pellet_19 = df_sol_proc[pm_cond_19].\
    groupby('source_index')[['value']].sum()

max_processed_18 = proc_pellet_18.max()[0]
max_processed_19 = proc_pellet_19.max()[0]

assert (proc_pellet_18 <= 20000).values.all(), f"Constraint 3 violated for 2018: {max_processed_18}"
assert (proc_pellet_19 <= 20000).values.all(), f"Constraint 3 violated for 2019: {max_processed_19}"

## Constraint 8: Pellets in = Pellets out

In [245]:
pellets_in_18 = stocked_bio_18.copy()
pellets_out_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'pellet_demand_supply')].\
    groupby('source_index')[['value']].sum()

max_dif = (pellets_out_18 - pellets_in_18).abs().max().values[0]
print(f'Pellets in and out of 2018 do not match: {max_dif}')
assert max_dif <= .001, f'Pellets in and out of 2018 do not match: {max_dif}'


Pellets in and out of 2018 do not match: 0.0009000000136438757


## 7. 80%

In [270]:
processed_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'pellet_demand_supply')]['value'].sum()
forecast_18 = df_sol_proc[(df_sol_proc['year'] == '2018')&(df_sol_proc['data_type'] == 'biomass_forecast')]['value'].sum()

print('Processed 2018: ', processed_18)
print('Forecast 2018: ', forecast_18)
print("Ratio: ", processed_18/forecast_18)

Processed 2018:  384857.0110760374
Forecast 2018:  384857.02107603743
Ratio:  0.9999999740163243


In [271]:
processed_19 = df_sol_proc[(df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'pellet_demand_supply')]['value'].sum()
forecast_19 = df_sol_proc[(df_sol_proc['year'] == '2019')&(df_sol_proc['data_type'] == 'biomass_forecast')]['value'].sum()

print('Processed 2019: ', processed_19)
print('Forecast 2019: ', forecast_19)
print("Ratio: ", processed_19/forecast_19)

Processed 2019:  384857.01107603766
Forecast 2019:  384857.02107603766
Ratio:  0.9999999740163243


In [248]:
print("N Depots: ", len(df_sol_proc[df_sol_proc['data_type'] == 'depot_location']))
df_sol_proc[df_sol_proc['data_type'] == 'depot_location']#.head()

N Depots:  25


Unnamed: 0,data_type,year,source_index,destination_index,value
4916,depot_location,20182019,1888,,1.0
4917,depot_location,20182019,690,,1.0
4918,depot_location,20182019,265,,1.0
4919,depot_location,20182019,1937,,1.0
4920,depot_location,20182019,901,,1.0
4921,depot_location,20182019,955,,1.0
4922,depot_location,20182019,420,,1.0
4923,depot_location,20182019,1053,,1.0
4924,depot_location,20182019,1473,,1.0
4925,depot_location,20182019,589,,1.0


In [249]:
print("N Refineries: ", len(df_sol_proc[df_sol_proc['data_type'] == 'refinery_location']))

df_sol_proc[df_sol_proc['data_type'] == 'refinery_location']#.head()

N Refineries:  4


Unnamed: 0,data_type,year,source_index,destination_index,value
4912,refinery_location,20182019,589,,1.0
4913,refinery_location,20182019,690,,1.0
4914,refinery_location,20182019,1937,,1.0
4915,refinery_location,20182019,2170,,1.0


In [38]:
df_sol_proc.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'subm_cluster_and_refs_round.csv'), index=False)


In [250]:
df_sol_proc

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_demand_supply,2018,0,252,0.057442
1,biomass_demand_supply,2018,1,252,13.510995
2,biomass_demand_supply,2018,2,252,5.321604
3,biomass_demand_supply,2018,3,252,0.120626
4,biomass_demand_supply,2018,4,252,0.038879
...,...,...,...,...,...
4831,biomass_forecast,2019,2413,,52.549976
4832,biomass_forecast,2019,2414,,9.861409
4833,biomass_forecast,2019,2415,,7.909709
4834,biomass_forecast,2019,2416,,7.005911


In [None]:
from mip import Model
m = Model()
m.read()