# Submission Preparation

In [43]:
import pandas as pd
import os

SYNTH_DATA_PATH = '../data'
OUT_SYNTH_DATA_PATH = '../output'
SOLUTION_FILE = 'solution_27_07_2023_18_22_13.csv'

- We have provided a sample solution.csv file for your reference. Your submission must
follow the same format.
- If you don’t provide values for all valid indices, a default value i.e. zero will be considered
as the value for those indices. This may result into constraint violation.
- For data_type depot_location and refinery_location, entries under destination_index and
value are not required and will be disregarded. You may choose to keep these entries blank.
- For data_type biomass_forecast, entries under destination_index are not required and will be
disregarded. You may choose to keep these entries blank.
- Optimized supply chain infrastructure proposed in your solution must be the same for both
year 2018 and 2019. Consequently, the entries for data_type depot_location and
refinery_location will be agnostic to year. Use year = 20182019 for such common entries.
- You can only place one depot per grid block/location. Similarly, You can only place one
biorefinery per grid block/location.
- Your solution will be eligible for ranking only if it satisfies all the constraints for 2018 and 2019.
- You do not need to submit your source code files. When you submit your solution, you can
ignore the "Upload source code" field.
- We will keep the first year (2018) of your solution for the public leaderboard. You can test
your solution any time and see how it ranks.
- We will keep the second year (2019) of your solution for the private leaderboard and it will
be used to determine the finalists.

In [44]:
df_submission = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'sample_submission.csv'))
df_submission.head()

Unnamed: 0,year,data_type,source_index,destination_index,value
0,20182019,depot_location,1256,,
1,20182019,depot_location,1595,,
2,20182019,depot_location,1271,,
3,20182019,depot_location,2001,,
4,20182019,depot_location,2201,,


In [45]:
df_submission['data_type'].value_counts()

data_type
biomass_demand_supply    21646
biomass_forecast          4836
pellet_demand_supply       152
depot_location              21
refinery_location            4
Name: count, dtype: int64

In [46]:
df_sol = pd.read_csv(os.path.join(OUT_SYNTH_DATA_PATH, SOLUTION_FILE))
df_sol.columns = ["data_type", "solution"]
df_sol.head()

Unnamed: 0,data_type,solution
0,b_2018_0_0,0.0
1,b_2018_0_1,0.0
2,b_2018_0_2,0.0
3,b_2018_0_3,0.0
4,b_2018_0_4,0.0


In [47]:
df_fc = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'Biomass_History_Synthetic.csv'))
df_fc.head()

Unnamed: 0,Index,Latitude,Longitude,2018,2019
0,0,24.66818,71.33144,7.387925,5.180296
1,1,24.66818,71.41106,40.431847,42.126945
2,2,24.66818,71.49069,59.181629,73.203232
3,3,24.66818,71.57031,74.53672,101.067352
4,4,24.66818,71.64994,16.531315,26.086885


In [48]:
df_fc_sol = df_fc.copy()
df_fc_sol = df_fc_sol.melt(value_vars=['2018', '2019'], var_name='year', value_name='biomass', id_vars=['Index'])
df_fc_sol['data_type'] = 'biomass_forecast'
df_fc_sol['destination_index'] = None
df_fc_sol = df_fc_sol[['data_type', 'year', 'Index', 'destination_index', 'biomass']]
df_fc_sol.columns = ['data_type', 'year', 'source_index', 'destination_index', 'value']
df_fc_sol.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_forecast,2018,0,,7.387925
1,biomass_forecast,2018,1,,40.431847
2,biomass_forecast,2018,2,,59.181629
3,biomass_forecast,2018,3,,74.53672
4,biomass_forecast,2018,4,,16.531315


## Biomass

In [49]:
df_sol_proc = df_sol.copy()
df_sol_proc['data_type'] = df_sol_proc['data_type'].str.replace('x_', 'x_20182019_').str.replace('r_', 'r_20182019_')
df_sol_proc = df_sol_proc['data_type'].str.split("_", expand=True)
df_sol_proc.columns = ['data_type', 'year', 'source_index', 'destination_index']

df_sol_proc['value'] = df_sol['solution']
df_sol_proc['data_type'] = df_sol_proc['data_type'].map({'b': 'biomass_demand_supply', 
                                                         'p': 'pellet_demand_supply', 
                                                         'x': 'depot_location', 
                                                         'r': 'refinery_location'})
df_sol_proc = df_sol_proc[df_sol_proc['value'] != 0]

df_sol_proc = pd.concat([df_sol_proc, df_fc_sol])
df_sol_proc.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
35,biomass_demand_supply,2018,1,5,40.431847
65,biomass_demand_supply,2018,2,5,59.181629
95,biomass_demand_supply,2018,3,5,74.53672
125,biomass_demand_supply,2018,4,5,16.531315
155,biomass_demand_supply,2018,5,5,23.592617


In [50]:
df_sol_proc[df_sol_proc['data_type'] == 'depot_location'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3605,depot_location,20182019,5,,1.0


In [51]:
df_sol_proc[df_sol_proc['data_type'] == 'refinery_location'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3658,refinery_location,20182019,28,,1.0


In [52]:
df_sol_proc.data_type.value_counts()

data_type
biomass_forecast         4836
biomass_demand_supply      34
pellet_demand_supply        2
depot_location              1
refinery_location           1
Name: count, dtype: int64

In [53]:
df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
35,biomass_demand_supply,2018,1,5,40.431847
65,biomass_demand_supply,2018,2,5,59.181629
95,biomass_demand_supply,2018,3,5,74.53672
125,biomass_demand_supply,2018,4,5,16.531315
155,biomass_demand_supply,2018,5,5,23.592617


In [54]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
2645,pellet_demand_supply,2018,5,28,1190.413179
3545,pellet_demand_supply,2019,5,28,1413.580827


In [55]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
2645,pellet_demand_supply,2018,5,28,1190.413179
3545,pellet_demand_supply,2019,5,28,1413.580827


In [56]:
df_sol_proc.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'subm_yearly_test.csv'), index=False)