# Submission Preparation

In [1]:
import pandas as pd
import os

SYNTH_DATA_PATH = '../data'
OUT_SYNTH_DATA_PATH = '../output'
SOLUTION_FILE = 'clustering/refineries/solution_19_08_2023_16_30_46.csv'

- We have provided a sample solution.csv file for your reference. Your submission must
follow the same format.
- If you don’t provide values for all valid indices, a default value i.e. zero will be considered
as the value for those indices. This may result into constraint violation.
- For data_type depot_location and refinery_location, entries under destination_index and
value are not required and will be disregarded. You may choose to keep these entries blank.
- For data_type biomass_forecast, entries under destination_index are not required and will be
disregarded. You may choose to keep these entries blank.
- Optimized supply chain infrastructure proposed in your solution must be the same for both
year 2018 and 2019. Consequently, the entries for data_type depot_location and
refinery_location will be agnostic to year. Use year = 20182019 for such common entries.
- You can only place one depot per grid block/location. Similarly, You can only place one
biorefinery per grid block/location.
- Your solution will be eligible for ranking only if it satisfies all the constraints for 2018 and 2019.
- You do not need to submit your source code files. When you submit your solution, you can
ignore the "Upload source code" field.
- We will keep the first year (2018) of your solution for the public leaderboard. You can test
your solution any time and see how it ranks.
- We will keep the second year (2019) of your solution for the private leaderboard and it will
be used to determine the finalists.

In [2]:
df_submission = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'sample_submission.csv'))
df_submission.head()

Unnamed: 0,year,data_type,source_index,destination_index,value
0,20182019,depot_location,1256,,
1,20182019,depot_location,1595,,
2,20182019,depot_location,1271,,
3,20182019,depot_location,2001,,
4,20182019,depot_location,2201,,


In [3]:
df_submission['data_type'].value_counts()

data_type
biomass_demand_supply    21646
biomass_forecast          4836
pellet_demand_supply       152
depot_location              21
refinery_location            4
Name: count, dtype: int64

In [4]:
df_sol = pd.read_csv(os.path.join(OUT_SYNTH_DATA_PATH, SOLUTION_FILE))
df_sol.columns = ["data_type", "solution"]
df_sol.head()

Unnamed: 0,data_type,solution
0,b_2018_0_122,7.387925
1,b_2018_1_122,40.431847
2,b_2018_2_122,59.181629
3,b_2018_3_122,74.53672
4,b_2018_4_122,16.531315


In [5]:
df_fc = pd.read_csv(os.path.join(SYNTH_DATA_PATH, 'Biomass_History_Synthetic.csv'))
df_fc.head()

Unnamed: 0,Index,Latitude,Longitude,2018,2019
0,0,24.66818,71.33144,7.387925,5.180296
1,1,24.66818,71.41106,40.431847,42.126945
2,2,24.66818,71.49069,59.181629,73.203232
3,3,24.66818,71.57031,74.53672,101.067352
4,4,24.66818,71.64994,16.531315,26.086885


In [6]:
df_fc_sol = df_fc.copy()
df_fc_sol = df_fc_sol.melt(value_vars=['2018', '2019'], var_name='year', value_name='biomass', id_vars=['Index'])
df_fc_sol['data_type'] = 'biomass_forecast'
df_fc_sol['destination_index'] = None
df_fc_sol = df_fc_sol[['data_type', 'year', 'Index', 'destination_index', 'biomass']]
df_fc_sol.columns = ['data_type', 'year', 'source_index', 'destination_index', 'value']
df_fc_sol.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_forecast,2018,0,,7.387925
1,biomass_forecast,2018,1,,40.431847
2,biomass_forecast,2018,2,,59.181629
3,biomass_forecast,2018,3,,74.53672
4,biomass_forecast,2018,4,,16.531315


## Biomass

In [7]:
df_sol_proc = df_sol.copy()
df_sol_proc['data_type'] = df_sol_proc['data_type'].str.replace('x_', 'x_20182019_').str.replace('r_', 'r_20182019_')
df_sol_proc = df_sol_proc['data_type'].str.split("_", expand=True)
df_sol_proc.columns = ['data_type', 'year', 'source_index', 'destination_index']

df_sol_proc['value'] = df_sol['solution']
df_sol_proc['data_type'] = df_sol_proc['data_type'].map({'b': 'biomass_demand_supply', 
                                                         'p': 'pellet_demand_supply', 
                                                         'x': 'depot_location', 
                                                         'r': 'refinery_location'})
df_sol_proc = df_sol_proc[df_sol_proc['value'] != 0]

df_sol_proc = pd.concat([df_sol_proc, df_fc_sol])
df_sol_proc.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_demand_supply,2018,0,122,7.387925
1,biomass_demand_supply,2018,1,122,40.431847
2,biomass_demand_supply,2018,2,122,59.181629
3,biomass_demand_supply,2018,3,122,74.53672
4,biomass_demand_supply,2018,4,122,16.531315


In [8]:
df_sol_proc[df_sol_proc['data_type'] == 'depot_location']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3986,depot_location,20182019,1886,,1.0
3987,depot_location,20182019,564,,1.0
3988,depot_location,20182019,305,,1.0
3989,depot_location,20182019,2187,,1.0
3990,depot_location,20182019,1631,,1.0
3991,depot_location,20182019,1852,,1.0
3992,depot_location,20182019,1128,,1.0
3993,depot_location,20182019,1106,,1.0
3994,depot_location,20182019,1409,,1.0
3995,depot_location,20182019,2081,,1.0


In [9]:
df_sol_proc[df_sol_proc['data_type'] == 'refinery_location']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3983,refinery_location,20182019,234,,1.0
3984,refinery_location,20182019,1204,,1.0
3985,refinery_location,20182019,1768,,1.0


In [10]:
df_sol_proc.data_type.value_counts()

data_type
biomass_forecast         4836
biomass_demand_supply    3940
pellet_demand_supply       43
depot_location             22
refinery_location           3
Name: count, dtype: int64

In [11]:
df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply']#.head()

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_demand_supply,2018,0,122,7.387925
1,biomass_demand_supply,2018,1,122,40.431847
2,biomass_demand_supply,2018,2,122,59.181629
3,biomass_demand_supply,2018,3,122,74.536720
4,biomass_demand_supply,2018,4,122,16.531315
...,...,...,...,...,...
3935,biomass_demand_supply,2019,2406,2187,0.932969
3936,biomass_demand_supply,2019,2411,2187,0.057442
3937,biomass_demand_supply,2019,2412,2187,13.510995
3938,biomass_demand_supply,2019,2414,2187,0.120626


In [12]:
df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].destination_index.value_counts()#.head()

destination_index
2187    410
985     394
1202    327
305     327
564     324
1886    302
342     274
1128    270
1119    202
1605    171
1043    153
1852    143
1642    108
673     104
1757     94
1409     86
2081     62
122      51
1106     45
1323     36
1631     33
1927     24
Name: count, dtype: int64

In [13]:
df_fc.loc[1020, :]

Index        1020.000000
Latitude       22.814370
Longitude      73.640570
2018           95.019615
2019          149.782852
Name: 1020, dtype: float64

In [14]:
rest_2 = df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].groupby(['year', 'destination_index'])['value'].sum()
rest_2

year  destination_index
2018  1043                 13220.363777
      1106                  2745.738977
      1119                 20000.000000
      1128                 11314.556301
      1202                 18691.324290
      122                   4102.888521
      1323                   220.201591
      1409                  5939.697721
      1605                 20000.000000
      1631                  2354.163366
      1642                 12227.390031
      1757                 15662.727283
      1852                 10074.834094
      1886                 20000.000000
      1927                  3323.380390
      2081                  7134.895004
      2187                 20000.000000
      305                  20000.000000
      342                  20000.000000
      564                  20000.000000
      673                   9335.575064
      985                  17090.710174
2019  1043                 20000.000000
      1106                  6005.605839
      1119      

In [15]:
df_fc['2018'].sum()

273449.981870257

In [16]:
rest_2.reset_index().groupby('year')['value'].sum()[0] / df_fc['2018'].sum()

0.9999578157395729

In [17]:
rest_2.reset_index().groupby('year')['value'].sum()[1] / df_fc['2019'].sum()

0.7105236201894057

In [18]:
rest_2 = df_sol_proc[df_sol_proc['data_type'] == 'biomass_demand_supply'].groupby(['year', 'source_index'])['value'].sum()
rest_2

year  source_index
2018  0                 7.387925
      1                40.431847
      10                7.732000
      100             105.806015
      1000             38.982693
                         ...    
2019  995             248.980408
      996             316.854645
      997             128.865372
      998             375.864471
      999             257.260864
Name: value, Length: 3925, dtype: float64

In [19]:
df_fc.iloc[1063, :]

Index        1063.000000
Latitude       22.733770
Longitude      72.685070
2018          247.525650
2019          151.901977
Name: 1063, dtype: float64

In [20]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3940,pellet_demand_supply,2018,122,234,4102.889521
3941,pellet_demand_supply,2018,305,234,20000.001
3942,pellet_demand_supply,2018,342,234,20000.001
3943,pellet_demand_supply,2018,564,234,20000.001
3944,pellet_demand_supply,2018,673,234,9335.576064


In [21]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].head()

Unnamed: 0,data_type,year,source_index,destination_index,value
3940,pellet_demand_supply,2018,122,234,4102.889521
3941,pellet_demand_supply,2018,305,234,20000.001
3942,pellet_demand_supply,2018,342,234,20000.001
3943,pellet_demand_supply,2018,564,234,20000.001
3944,pellet_demand_supply,2018,673,234,9335.576064


In [22]:
df_sol_proc[df_sol_proc['data_type'] == 'pellet_demand_supply'].groupby(['year'])['value'].sum()

year
2018    273438.468585
2019    273449.981870
Name: value, dtype: float64

In [23]:
df_sol_proc[(df_sol_proc['data_type'] == 'biomass_forecast')].groupby(['year'])['value'].sum() * .8

year
2018    218759.985496
2019    307885.616861
Name: value, dtype: float64

In [24]:
df_sol_proc.to_csv(os.path.join(OUT_SYNTH_DATA_PATH, 'subm_cluster_and_refs.csv'), index=False)

In [25]:
df_sol_proc

Unnamed: 0,data_type,year,source_index,destination_index,value
0,biomass_demand_supply,2018,0,122,7.387925
1,biomass_demand_supply,2018,1,122,40.431847
2,biomass_demand_supply,2018,2,122,59.181629
3,biomass_demand_supply,2018,3,122,74.536720
4,biomass_demand_supply,2018,4,122,16.531315
...,...,...,...,...,...
4831,biomass_forecast,2019,2413,,5.321604
4832,biomass_forecast,2019,2414,,0.120626
4833,biomass_forecast,2019,2415,,0.038879
4834,biomass_forecast,2019,2416,,1.304297
