# Distribution of production to all facilities

Assumptions -> Each facility (within product and territory group) is producing at same rate of it's capacity

Caveats -> Allocating production for FORMER USSR to RUSSIAN FEDERATION, FORMER CZECHOSLOVAKIA TO CZECH REPUBLIC, FORMER YUGOSLAVIA TO SERBIA/MONTENEGRO/KOSOVO
-> Not using specific routes stated in production as they do not match-up with capacity routes

In [1]:
import pandas as pd

filepath = "D:/data/ICIS_data/facility_stats/to_2050/"
output_path = '../data/processed/'

In [5]:
## Data filtering
# Import ICIS data for facilities and production
capacity, production = [pd.read_csv(filepath+file, low_memory=False, index_col=0) for file in ['capacity.csv', 'production.csv']]

# Distribute production amongst facilities according to capacities
years = list(map(str, list(range(1978, 2051))))
capacity_filt = capacity.dropna(axis=1, how='all').replace('-',0).fillna(0)
capacity_filt[years] = capacity_filt[years].astype(float)

# Fix production names
name_dict = {'FORMER USSR':'RUSSIAN FEDERATION', 'FORMER CZECHOSLOVAKIA':'CZECH REPUBLIC', 'FORMER YUGOSLAVIA': 'SERBIA/MONTENEGRO/KOSOVO'}
production['COUNTRY/TERRITORY'] = production['COUNTRY/TERRITORY'].replace(name_dict)
production_filt = production.dropna(axis=1, how='all').replace('-',0).fillna(0)
production_filt[years] = production_filt[years].astype(float)

In [20]:
# Get proportion of capacity that each facility represents per product/country
grouping_cols = ['PRODUCT', 'COUNTRY/TERRITORY']
capacity_totals = capacity_filt.groupby(grouping_cols).sum()
capacity_props = capacity_filt.merge(capacity_totals[years], on=grouping_cols, how='left')
x_cols, y_cols = [[str(i)+addition for i in years] for addition in ['_x', '_y']]

for year, x_col, y_col in zip(years, x_cols, y_cols):
    capacity_props[year] = capacity_props[x_col]/capacity_props[y_col]
capacity_props = capacity_props[list(capacity_props.columns[:14])+years].fillna(0)

# Get production per grouping columns
production_totals = production_filt[grouping_cols+years].groupby(grouping_cols).sum().reset_index()

# Get production per facility
facility_production = capacity_props.merge(production_totals, on=grouping_cols, how='left')
for year, x_col, y_col in zip(years, x_cols, y_cols):
    facility_production[year] = facility_production[x_col]*facility_production[y_col]
facility_production = facility_production[list(facility_production.columns[:14])+years].fillna(0)

In [22]:
facility_production.to_csv(output_path+'icisFacilityProduction.csv')

In [23]:
## Add uncertainties
production_uncertainty = 0.1
facility_production = pd.read_csv(output_path+'icisFacilityProduction.csv', index_col=0)

years = list(map(str, list(range(1978, 2051))))
for col in years:
    facility_production[col+'_sigma'] = facility_production[col]*production_uncertainty

facility_production.to_csv(output_path+'icisFacilityProduction_w_uncertainties.csv')

In [53]:
## Remove parent products do avoid double counting
production = pd.read_csv(output_path+'icisFacilityProduction_w_uncertainties.csv', index_col=0)

years = [str(i) for i in range(1978, 2051)]
years_sigma = [i+'_sigma' for i in years]
years_x = [i+'_x' for i in years]
years_y = [i+'_y' for i in years]
years_sigma_x = [i+'_sigma_x' for i in years]
years_sigma_y = [i+'_sigma_y' for i in years]

parents = ['POLYESTER POLYMER', 'POLYAMIDE FIBRES']
children = [['PET RESINS', 'POLYESTER FIBRES', 'POLYESTER FILM'], ['PAM RESINS (6+66)']]

In [54]:
for parent, childs in zip(parents, children):
    df_parent = production[production['PRODUCT']==parent]

    for child in childs:
        df_child = production[production['PRODUCT']==child]
        merged = df_parent.merge(df_child, how='left', on=['COUNTRY/TERRITORY', 'STATE', 'COMPANY', 'SITE', '#'])

        parent_update = (merged[years_x].values-merged[years_y].fillna(0).values).clip(0)
        parent_sigma_update = (merged[years_sigma_x].values-merged[years_sigma_y].fillna(0).values).clip(0)

        df_parent = merged.drop(columns=[col for col in merged.columns if '_y' in col ])
        df_parent.columns = [title[:-2] if '_x' in title else title for title in df_parent.columns]

        df_parent[years] = parent_update
        df_parent[years_sigma] = parent_sigma_update

    production = pd.concat((production[production['PRODUCT']!=parent], df_parent))

In [55]:
production = production.sort_values(['PRODUCT', 'COUNTRY/TERRITORY', 'COMPANY', 'SITE', '#', 'ROUTE', 'TECHNOLOGY']).reset_index(drop=True)

In [57]:
production.to_csv(output_path+'icisFacilityProduction_dedoubled.csv')