# Submission Preparation

In [64]:
import pandas as pd
import os

SYNTH_DATA_PATH = '../data'
OUT_SYNTH_DATA_PATH = '../output'
SOLUTION_FILE = 'solution_17_08_2023_01_57_09.csv'

- We have provided a sample solution.csv file for your reference. Your submission must
follow the same format.
- If you don’t provide values for all valid indices, a default value i.e. zero will be considered
as the value for those indices. This may result into constraint violation.
- For data_type depot_location and refinery_location, entries under destination_index and
value are not required and will be disregarded. You may choose to keep these entries blank.
- For data_type biomass_forecast, entries under destination_index are not required and will be
disregarded. You may choose to keep these entries blank.
- Optimized supply chain infrastructure proposed in your solution must be the same for both
year 2018 and 2019. Consequently, the entries for data_type depot_location and
refinery_location will be agnostic to year. Use year = 20182019 for such common entries.
- You can only place one depot per grid block/location. Similarly, You can only place one
biorefinery per grid block/location.
- Your solution will be eligible for ranking only if it satisfies all the constraints for 2018 and 2019.
- You do not need to submit your source code files. When you submit your solution, you can
ignore the "Upload source code" field.
- We will keep the first year (2018) of your solution for the public leaderboard. You can test
your solution any time and see how it ranks.
- We will keep the second year (2019) of your solution for the private leaderboard and it will
be used to determine the finalists.

In [65]:
df_submission = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'sample_submission.csv'))
df_submission.head()

Unnamed: 0,year,data_type,source_index,destination_index,value
0,20182019,depot_location,1256,,
1,20182019,depot_location,1595,,
2,20182019,depot_location,1271,,
3,20182019,depot_location,2001,,
4,20182019,depot_location,2201,,


In [66]:
df_submission['data_type'].value_counts()

data_type
biomass_demand_supply    21646
biomass_forecast          4836
pellet_demand_supply       152
depot_location              21
refinery_location            4
Name: count, dtype: int64

In [67]:
df_sol = pd.read_csv(os.path.join(OUT_SYNTH_DATA_PATH, SOLUTION_FILE))
df_sol.columns = ["data_type", "solution"]
df_sol.head()

Unnamed: 0,data_type,solution
0,b_2018_858_858,0.0
1,b_2018_858_921,0.0
2,b_2018_858_922,0.0
3,b_2018_858_793,0.0
4,b_2018_858_1147,0.0


In [68]:
df_fc = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'Biomass_History_Synthetic.csv'))
df_fc.head()

Unnamed: 0,Index,Latitude,Longitude,2018,2019
0,0,24.66818,71.33144,7.387925,5.180296
1,1,24.66818,71.41106,40.431847,42.126945
2,2,24.66818,71.49069,59.181629,73.203232
3,3,24.66818,71.57031,74.53672,101.067352
4,4,24.66818,71.64994,16.531315,26.086885


In [69]:
df_fc_sol = df_fc.copy()
df_fc_sol = df_fc_sol.melt(value_vars=['2018', '2019'], var_name='year', value_name='biomass', id_vars=['Index'])
df_fc_sol['data_type'] = 'biomass_forecast'
df_fc_sol['destination_index'] = None
df_fc_sol = df_fc_sol[['data_type', 'year', 'Index', 'destination_index', 'biomass']]
df_fc_sol.columns = ['data_type', 'year', 'source_index', 'destination_index', 'value']
df_fc_sol.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_forecast,2018,0,,7.387925
1,biomass_forecast,2018,1,,40.431847
2,biomass_forecast,2018,2,,59.181629
3,biomass_forecast,2018,3,,74.53672
4,biomass_forecast,2018,4,,16.531315


## Biomass

In [70]:
df_sol_proc = df_sol.copy()
df_sol_proc['data_type'] = df_sol_proc['data_type'].str.replace('x_', 'x_20182019_').str.replace('r_', 'r_20182019_')
df_sol_proc = df_sol_proc['data_type'].str.split("_", expand=True)
df_sol_proc.columns = ['data_type', 'year', 'source_index', 'destination_index']

df_sol_proc['value'] = df_sol['solution']
df_sol_proc['data_type'] = df_sol_proc['data_type'].map({'b': 'biomass_demand_supply', 
                                                         'p': 'pellet_demand_supply', 
                                                         'x': 'depot_location', 
                                                         'r': 'refinery_location'})
df_sol_proc = df_sol_proc[df_sol_proc['value'] != 0]

df_sol_proc = pd.concat([df_sol_proc, df_fc_sol])
df_sol_proc.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
120,biomass_demand_supply,2018,858,1263,514.148987
348,biomass_demand_supply,2018,921,1263,511.325287
587,biomass_demand_supply,2018,922,1263,465.86322
803,biomass_demand_supply,2018,793,1263,450.37442
1059,biomass_demand_supply,2018,1147,1263,407.832275


In [71]:
df_sol_proc[df_sol_proc['data_type'] == 'depot_location']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
460403,depot_location,20182019,939,,1.0
460455,depot_location,20182019,1172,,1.0
460478,depot_location,20182019,1229,,1.0
460492,depot_location,20182019,1263,,1.0
460541,depot_location,20182019,1408,,1.0
460602,depot_location,20182019,1570,,1.0
460607,depot_location,20182019,1581,,1.0
460662,depot_location,20182019,1700,,1.0
460679,depot_location,20182019,1743,,1.0
460734,depot_location,20182019,1868,,1.0


In [72]:
df_sol_proc[df_sol_proc['data_type'] == 'refinery_location']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
460978,refinery_location,20182019,1229,,1.0
460992,refinery_location,20182019,1263,,1.0
461067,refinery_location,20182019,1484,,1.0


In [73]:
df_sol_proc.data_type.value_counts()

data_type
biomass_forecast         4836
pellet_demand_supply      851
biomass_demand_supply     800
depot_location             12
refinery_location           3
Name: count, dtype: int64

In [74]:
df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
120,biomass_demand_supply,2018,858,1263,514.148987
348,biomass_demand_supply,2018,921,1263,511.325287
587,biomass_demand_supply,2018,922,1263,465.863220
803,biomass_demand_supply,2018,793,1263,450.374420
1059,biomass_demand_supply,2018,1147,1263,407.832275
...,...,...,...,...,...
228341,biomass_demand_supply,2019,1462,1263,426.573761
229149,biomass_demand_supply,2019,1339,1263,311.931122
229415,biomass_demand_supply,2019,615,1172,165.909622
229685,biomass_demand_supply,2019,1346,1229,234.120773


In [75]:
df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].destination_index.value_counts()#.head()

destination_index
1263    398
1229    294
1172     34
1700     29
1581     27
1743      8
1570      3
1973      2
1868      2
1408      1
2028      1
939       1
Name: count, dtype: int64

In [76]:
df_fc.loc[1020, :]

Index        1020.000000
Latitude       22.814370
Longitude      73.640570
2018           95.019615
2019          149.782852
Name: 1020, dtype: float64

In [77]:
rest_2 = df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].groupby(['year', 'destination_index'])['value'].sum()
rest_2

year  destination_index
2018  1172                  4762.383819
      1229                 43315.433063
      1263                 45474.178040
      1570                    72.311478
      1581                   175.753792
      1700                   260.709961
      1868                     0.001000
      1973                   383.598602
2019  1172                  4464.618179
      1229                 47149.658309
      1263                 80000.000000
      1408                   444.953033
      1570                  1215.219940
      1581                 12176.345459
      1700                 11033.398499
      1743                  4524.158783
      1868                     0.001000
      1973                   539.139099
      2028                     0.001000
      939                    254.020981
Name: value, dtype: float64

In [78]:
rest_2 = df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].groupby(['year', 'source_index'])['value'].sum()
rest_2

year  source_index
2018  1002            300.422607
      1010            263.269318
      1011            266.378845
      1035             85.215050
      1036            110.556366
                         ...    
2019  983             657.363647
      984             427.431610
      991             377.508606
      996             316.854645
      998             375.864471
Name: value, Length: 799, dtype: float64

In [79]:
df_fc.iloc[1063, :]

Index        1063.000000
Latitude       22.733770
Longitude      72.685070
2018          247.525650
2019          151.901977
Name: 1063, dtype: float64

In [80]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
230280,pellet_demand_supply,2018,858,1263,0.001
230508,pellet_demand_supply,2018,921,1263,0.001
230747,pellet_demand_supply,2018,922,1263,0.001
230963,pellet_demand_supply,2018,793,1263,0.001
231219,pellet_demand_supply,2018,1147,1263,0.001


In [81]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
230280,pellet_demand_supply,2018,858,1263,0.001
230508,pellet_demand_supply,2018,921,1263,0.001
230747,pellet_demand_supply,2018,922,1263,0.001
230963,pellet_demand_supply,2018,793,1263,0.001
231219,pellet_demand_supply,2018,1147,1263,0.001


In [82]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].groupby(['year'])['value'].sum()

year
2018     94444.804755
2019    161801.927282
Name: value, dtype: float64

In [83]:
df_sol_proc[(df_sol_proc['data_type'] == 'biomass_forecast')].groupby(['year'])['value'].sum() * .8

year
2018    218759.985496
2019    307885.616861
Name: value, dtype: float64

In [52]:
df_sol_proc.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'subm_yearly_test_def.csv'), index=False)

In [53]:
df_sol_proc

Unnamed: 0,data_type,year,source_index,destination_index,value
71,biomass_demand_supply,2018,0,343,7.387925
485,biomass_demand_supply,2018,4,343,16.531315
895,biomass_demand_supply,2018,9,343,36.008152
1317,biomass_demand_supply,2018,14,343,0.407948
1733,biomass_demand_supply,2018,19,343,60.418457
...,...,...,...,...,...
4831,biomass_forecast,2019,2413,,5.321604
4832,biomass_forecast,2019,2414,,0.120626
4833,biomass_forecast,2019,2415,,0.038879
4834,biomass_forecast,2019,2416,,1.304297
