# Data Tuning: Grouping variables, feature selection

* The following notebook shows the process of grouping the variables of interest and dowsizing the dataset
* The notebook works with the EXIOBASE combined data 2010-2019 from the previous notebook parsing_dataset

In [100]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import textwrap
import matplotlib as mpl

In [101]:
data_full = pd.read_csv('/Users/gresasmolica/Desktop/Gresa Smolica/Hertie - MDS/Master_thesis/de_gov_all.csv')

In [102]:
# column that sums all taxes: Taxes less subsidies on products purchased: Total and Other net taxes on production
data_full['gov_taxes_total'] = data_full['Taxes less subsidies on products purchased: Total'] + data_full['Other net taxes on production']
# drop the original columns
data_full = data_full.drop([
    'Taxes less subsidies on products purchased: Total', 
    'Other net taxes on production'], axis=1)

In [103]:
# column that sums employee compensation related variables
data_full['gov_employee_compensation_total'] = data_full['Compensation of employees; wages, salaries, & employers\' social contributions: Low-skilled'] + data_full['Compensation of employees; wages, salaries, & employers\' social contributions: Medium-skilled'] + data_full['Compensation of employees; wages, salaries, & employers\' social contributions: High-skilled']
# drop the original columns
data_full = data_full.drop([
    'Compensation of employees; wages, salaries, & employers\' social contributions: Low-skilled', 
    'Compensation of employees; wages, salaries, & employers\' social contributions: Medium-skilled', 
    'Compensation of employees; wages, salaries, & employers\' social contributions: High-skilled'], axis=1)

In [104]:
# column that sums variables related to operating surplus
data_full['gov_operating_surplus_total'] = data_full['Operating surplus: Consumption of fixed capital'] + data_full['Operating surplus: Remaining net operating surplus'] + data_full['Operating surplus: Rents on land'] + data_full['Operating surplus: Royalties on resources']
# drop the original columns
data_full = data_full.drop([
    'Operating surplus: Consumption of fixed capital', 
    'Operating surplus: Remaining net operating surplus', 
    'Operating surplus: Rents on land', 
    'Operating surplus: Royalties on resources'], axis=1)

In [105]:
# column that sums variables related to employment based on skill level
data_full['gov_employment_total'] = data_full['Employment: Low-skilled male'] + data_full['Employment: Low-skilled female'] + data_full['Employment: Medium-skilled male'] + data_full['Employment: Medium-skilled female'] + data_full['Employment: High-skilled male'] + data_full['Employment: High-skilled female']
# drop the original columns
data_full = data_full.drop([
    'Employment: Low-skilled male', 
    'Employment: Low-skilled female', 
    'Employment: Medium-skilled male', 
    'Employment: Medium-skilled female', 
    'Employment: High-skilled male', 
    'Employment: High-skilled female'], axis=1)

In [106]:
# column that sums variables related to employment hours based on skill level
data_full['gov_employment_hours_total'] = data_full['Employment hours: Low-skilled male'] + data_full['Employment hours: Low-skilled female'] + data_full['Employment hours: Medium-skilled male'] + data_full['Employment hours: Medium-skilled female'] + data_full['Employment hours: High-skilled male'] + data_full['Employment hours: High-skilled female'] + data_full['Employment: Vulnerable employment'] + data_full['Employment hours: Vulnerable employment']
# drop the original columns
data_full = data_full.drop([
    'Employment hours: Low-skilled male', 
    'Employment hours: Low-skilled female', 
    'Employment hours: Medium-skilled male',
    'Employment hours: Medium-skilled female',
    'Employment hours: High-skilled male',
    'Employment hours: High-skilled female',
    'Employment: Vulnerable employment',
    'Employment hours: Vulnerable employment'], axis=1)

In [107]:
# column that sums gasses combusted in air
data_full['gov_gasses_combustion_air_total'] = data_full['CO2 - combustion - air'] + data_full['CH4 - combustion - air'] + data_full['N2O - combustion - air'] + data_full['SOx - combustion - air'] + data_full['NOx - combustion - air'] + data_full['NH3 - combustion - air'] + data_full['CO - combustion - air']
# drop the original columns
data_full = data_full.drop([
    'CO2 - combustion - air', 
    'CH4 - combustion - air', 
    'N2O - combustion - air',
    'SOx - combustion - air',
    'NOx - combustion - air',
    'NH3 - combustion - air',
    'CO - combustion - air'], axis=1)

In [108]:
# column that sums heavy metals combusted in air
data_full['gov_benzo_combustion_air_total'] = data_full['Benzo(a)pyrene - combustion - air'] + data_full['Benzo(b)fluoranthene - combustion - air'] + data_full['Benzo(k)fluoranthene - combustion - air'] + data_full['Indeno(1,2,3-cd)pyrene - combustion - air']
# drop the original columns
data_full = data_full.drop([
    'Benzo(a)pyrene - combustion - air', 
    'Benzo(b)fluoranthene - combustion - air', 
    'Benzo(k)fluoranthene - combustion - air',
    'Indeno(1,2,3-cd)pyrene - combustion - air'], axis=1)

In [109]:
# column that sums particles combusted in air
data_full['gov_particles_chemicals_combustion_air_total'] = data_full['PCBs - combustion - air'] + data_full['PCDD_F - combustion - air'] + data_full['HCB - combustion - air'] + data_full['NMVOC - combustion - air'] + data_full['PM10 - combustion - air'] + data_full['PM2.5 - combustion - air'] + data_full['TSP - combustion - air']
# drop the original columns
data_full = data_full.drop([
    'PCBs - combustion - air', 
    'PCDD_F - combustion - air', 
    'HCB - combustion - air',
    'NMVOC - combustion - air',
    'PM10 - combustion - air',
    'PM2.5 - combustion - air',
    'TSP - combustion - air'], axis=1)

In [110]:
# column that sums heavy metals combusted in air
data_full['gov_elements_combustion_air_total'] = data_full['As - combustion - air'] + data_full['Cd - combustion - air'] + data_full['Cr - combustion - air'] + data_full['Cu - combustion - air'] + data_full['Hg - combustion - air'] + data_full['Ni - combustion - air'] + data_full['Pb - combustion - air'] + data_full['Se - combustion - air'] + data_full['Zn - combustion - air']
# drop the original columns
data_full = data_full.drop([
    'As - combustion - air',
    'Cd - combustion - air',
    'Cr - combustion - air',
    'Cu - combustion - air',
    'Hg - combustion - air',
    'Ni - combustion - air',
    'Pb - combustion - air',
    'Se - combustion - air',
    'Zn - combustion - air'], axis=1)

In [111]:
# column that sums elements non combusted in air
data_full['gov_AS_non_combustion_air_total'] = data_full['As - non combustion - Agglomeration plant - pellets - air'] + data_full['As - non combustion - Agglomeration plant - sinter - air'] + data_full['As - non combustion - Glass production - air'] + data_full['As - non combustion - Production of coke oven coke - air'] + data_full['As - non combustion - Production of gascoke - air'] + data_full['As - non combustion - Steel production: basic oxygen furnace - air'] + data_full['As - non combustion - Steel production: electric arc furnace - air'] + data_full['As - non combustion - Steel production: open hearth furnace - air']
# drop the original columns
data_full = data_full.drop([
    'As - non combustion - Agglomeration plant - pellets - air',
    'As - non combustion - Agglomeration plant - sinter - air',
    'As - non combustion - Glass production - air',
    'As - non combustion - Production of coke oven coke - air',
    'As - non combustion - Production of gascoke - air',
    'As - non combustion - Steel production: basic oxygen furnace - air',
    'As - non combustion - Steel production: electric arc furnace - air',
    'As - non combustion - Steel production: open hearth furnace - air'], axis=1)

In [112]:
# column that sums elements non combusted in air
data_full['gov_B_non_combustion_air_total'] = data_full['B(a)P - non combustion - Primary aluminium production - air'] + data_full['B(a)P - non combustion - Production of coke oven coke - air'] + data_full['B(a)P - non combustion - Production of gascoke - air'] + data_full['B(b)F - non combustion - Primary aluminium production - air'] + data_full['B(b)F - non combustion - Production of coke oven coke - air'] + data_full['B(b)F - non combustion - Production of gascoke - air'] + data_full['B(k)F - non combustion - Primary aluminium production - air'] + data_full['B(k)F - non combustion - Production of coke oven coke - air'] + data_full['B(k)F - non combustion - Production of gascoke - air']
# drop the original columns
data_full = data_full.drop([
    'B(a)P - non combustion - Primary aluminium production - air',
    'B(a)P - non combustion - Production of coke oven coke - air',
    'B(a)P - non combustion - Production of gascoke - air',
    'B(b)F - non combustion - Primary aluminium production - air',
    'B(b)F - non combustion - Production of coke oven coke - air',
    'B(b)F - non combustion - Production of gascoke - air',
    'B(k)F - non combustion - Primary aluminium production - air',
    'B(k)F - non combustion - Production of coke oven coke - air',
    'B(k)F - non combustion - Production of gascoke - air'], axis=1)

In [113]:
# column that sums gasses non combusted in air
data_full['gov_CH4_non_combustion_air_total'] = data_full['CH4 - non combustion - Extraction/production of (natural) gas - air'] + data_full['CH4 - non combustion - Extraction/production of crude oil - air'] + data_full['CH4 - non combustion - Mining of antracite - air'] + data_full['CH4 - non combustion - Mining of bituminous coal - air'] + data_full['CH4 - non combustion - Mining of coking coal - air'] + data_full['CH4 - non combustion - Mining of lignite (brown coal) - air'] + data_full['CH4 - non combustion - Mining of sub-bituminous coal - air'] + data_full['CH4 - non combustion - Oil refinery - air']
# drop the original columns
data_full = data_full.drop([ 
    'CH4 - non combustion - Extraction/production of (natural) gas - air',
    'CH4 - non combustion - Extraction/production of crude oil - air',
    'CH4 - non combustion - Mining of antracite - air',
    'CH4 - non combustion - Mining of bituminous coal - air',
    'CH4 - non combustion - Mining of coking coal - air',
    'CH4 - non combustion - Mining of lignite (brown coal) - air',
    'CH4 - non combustion - Mining of sub-bituminous coal - air',
    'CH4 - non combustion - Oil refinery - air'], axis=1)

In [114]:
# column that sums gasses non combusted in air
data_full['gov_CO_non_combustion_air_total'] = data_full['CO - non combustion - Agglomeration plant - sinter - air'] + data_full['CO - non combustion - Bricks production - air'] + data_full['CO - non combustion - Carbon black production - air'] + data_full['CO - non combustion - Cement production - air'] + data_full['CO - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['CO - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['CO - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['CO - non combustion - Glass production - air'] + data_full['CO - non combustion - Lime production - air'] + data_full['CO - non combustion - Oil refinery - air'] + data_full['CO - non combustion - Pig iron production, blast furnace - air'] + data_full['CO - non combustion - Primary aluminium production - air'] + data_full['CO - non combustion - Production of coke oven coke - air'] + data_full['CO - non combustion - Production of gascoke - air'] + data_full['CO - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['CO - non combustion - Steel production: basic oxygen furnace - air'] + data_full['CO - non combustion - Steel production: electric arc furnace - air']

# drop the original columns
data_full = data_full.drop([
    'CO - non combustion - Agglomeration plant - sinter - air',
    'CO - non combustion - Bricks production - air',
    'CO - non combustion - Carbon black production - air',
    'CO - non combustion - Cement production - air',
    'CO - non combustion - Chemical wood pulp, dissolving grades - air',
    'CO - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'CO - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'CO - non combustion - Glass production - air',
    'CO - non combustion - Lime production - air',
    'CO - non combustion - Oil refinery - air',
    'CO - non combustion - Pig iron production, blast furnace - air',
    'CO - non combustion - Primary aluminium production - air',
    'CO - non combustion - Production of coke oven coke - air',
    'CO - non combustion - Production of gascoke - air',
    'CO - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'CO - non combustion - Steel production: basic oxygen furnace - air',
    'CO - non combustion - Steel production: electric arc furnace - air'], axis=1)

In [115]:
# column that sums gasses non combusted in air
data_full['gov_CO2_non_combustion_air_total'] = data_full['CO2 - non combustion - Cement production - air'] + data_full['CO2 - non combustion - Lime production - air']
# drop the original columns
data_full = data_full.drop([
    'CO2 - non combustion - Cement production - air',
    'CO2 - non combustion - Lime production - air'], axis=1)

In [116]:
# column that sums elements non combusted in air
data_full['gov_Cd_non_combustion_air_total'] = data_full['Cd - non combustion - Agglomeration plant - pellets - air'] + data_full['Cd - non combustion - Agglomeration plant - sinter - air'] + data_full['Cd - non combustion - Glass production - air'] + data_full['Cd - non combustion - Nickel, unwrought - air'] + data_full['Cd - non combustion - Production of coke oven coke - air'] + data_full['Cd - non combustion - Production of gascoke - air'] + data_full['Cd - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['Cd - non combustion - Refined lead, unwrought - air'] + data_full['Cd - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Cd - non combustion - Steel production: electric arc furnace - air'] + data_full['Cd - non combustion - Steel production: open hearth furnace - air'] + data_full['Cd - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['Cd - non combustion - Zinc, unwrought, not alloyed - air']

# drop the original columns
data_full = data_full.drop([
    'Cd - non combustion - Agglomeration plant - pellets - air',
    'Cd - non combustion - Agglomeration plant - sinter - air',
    'Cd - non combustion - Glass production - air',
    'Cd - non combustion - Nickel, unwrought - air',
    'Cd - non combustion - Production of coke oven coke - air',
    'Cd - non combustion - Production of gascoke - air',
    'Cd - non combustion - Refined copper; unwrought, not alloyed - air',
    'Cd - non combustion - Refined lead, unwrought - air',
    'Cd - non combustion - Steel production: basic oxygen furnace - air',
    'Cd - non combustion - Steel production: electric arc furnace - air',
    'Cd - non combustion - Steel production: open hearth furnace - air',
    'Cd - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'Cd - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [117]:
# column that sums elements non combusted in air
data_full['gov_Cr_non_combustion_air_total'] = data_full['Cr - non combustion - Agglomeration plant - pellets - air'] + data_full['Cr - non combustion - Agglomeration plant - sinter - air'] + data_full['Cr - non combustion - Glass production - air'] + data_full['Cr - non combustion - Pig iron production, blast furnace - air'] + data_full['Cr - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Cr - non combustion - Steel production: electric arc furnace - air'] + data_full['Cr - non combustion - Steel production: open hearth furnace - air']

# drop the original columns
data_full = data_full.drop([
    'Cr - non combustion - Agglomeration plant - pellets - air',
    'Cr - non combustion - Agglomeration plant - sinter - air',
    'Cr - non combustion - Glass production - air',
    'Cr - non combustion - Pig iron production, blast furnace - air',
    'Cr - non combustion - Steel production: basic oxygen furnace - air',
    'Cr - non combustion - Steel production: electric arc furnace - air',
    'Cr - non combustion - Steel production: open hearth furnace - air'], axis=1)

In [118]:
# column that sums elements non combusted in air
data_full['gov_Cu_non_combustion_air_total'] = data_full['Cu - non combustion - Agglomeration plant - pellets - air'] + data_full['Cu - non combustion - Agglomeration plant - sinter - air'] + data_full['Cu - non combustion - Glass production - air'] + data_full['Cu - non combustion - Pig iron production, blast furnace - air'] + data_full['Cu - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Cu - non combustion - Steel production: electric arc furnace - air'] + data_full['Cu - non combustion - Steel production: open hearth furnace - air']

# drop the original columns
data_full = data_full.drop([
    'Cu - non combustion - Agglomeration plant - pellets - air',
    'Cu - non combustion - Agglomeration plant - sinter - air',
    'Cu - non combustion - Glass production - air',
    'Cu - non combustion - Pig iron production, blast furnace - air',
    'Cu - non combustion - Steel production: basic oxygen furnace - air',
    'Cu - non combustion - Steel production: electric arc furnace - air',
    'Cu - non combustion - Steel production: open hearth furnace - air'], axis=1)

In [119]:
# column that sums gasses non combusted in air
data_full['gov_HCB_non_combustion_air_total'] = data_full['HCB - non combustion - Agglomeration plant - pellets - air'] + data_full['HCB - non combustion - Agglomeration plant - sinter - air']+ data_full['HCB - non combustion - Secondary aluminium production - air']
# drop the original columns
data_full = data_full.drop([
    'HCB - non combustion - Agglomeration plant - pellets - air',
    'HCB - non combustion - Agglomeration plant - sinter - air', 
    'HCB - non combustion - Secondary aluminium production - air'], axis=1)

In [120]:
# column that sums elements non combusted in air
data_full['gov_Hg_non_combustion_air_total'] = data_full['Hg - non combustion - Agglomeration plant - pellets - air'] + data_full['Hg - non combustion - Agglomeration plant - sinter - air'] + data_full['Hg - non combustion - Glass production - air'] + data_full['Hg - non combustion - Nickel, unwrought - air'] + data_full['Hg - non combustion - Pig iron production, blast furnace - air'] + data_full['Hg - non combustion - Production of coke oven coke - air'] + data_full['Hg - non combustion - Production of gascoke - air'] + data_full['Hg - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['Hg - non combustion - Refined lead, unwrought - air'] + data_full['Hg - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Hg - non combustion - Steel production: electric arc furnace - air'] + data_full['Hg - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['Hg - non combustion - Zinc, unwrought, not alloyed - air']

# drop the original columns
data_full = data_full.drop([
    'Hg - non combustion - Agglomeration plant - pellets - air',
    'Hg - non combustion - Agglomeration plant - sinter - air',
    'Hg - non combustion - Glass production - air',
    'Hg - non combustion - Nickel, unwrought - air',
    'Hg - non combustion - Pig iron production, blast furnace - air',
    'Hg - non combustion - Production of coke oven coke - air',
    'Hg - non combustion - Production of gascoke - air',
    'Hg - non combustion - Refined copper; unwrought, not alloyed - air',
    'Hg - non combustion - Refined lead, unwrought - air',
    'Hg - non combustion - Steel production: basic oxygen furnace - air',
    'Hg - non combustion - Steel production: electric arc furnace - air',
    'Hg - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'Hg - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [121]:
# column that sums elements non combusted in air
data_full['gov_Indeno_non_combustion_air_total'] = data_full['Indeno - non combustion - Primary aluminium production - air'] + data_full['Indeno - non combustion - Production of coke oven coke - air'] + data_full['Indeno - non combustion - Production of gascoke - air']

# drop the original columns
data_full = data_full.drop([
    'Indeno - non combustion - Primary aluminium production - air',
    'Indeno - non combustion - Production of coke oven coke - air',
    'Indeno - non combustion - Production of gascoke - air'], axis=1)

In [122]:
# column that sums this particular element non combusted in air
data_full['gov_NMVOC_non_combustion_air_total'] = data_full['NMVOC - non combustion - Beef and veal - air'] + data_full['NMVOC - non combustion - Coil coating (coating of aluminum and steel) - air'] + data_full['NMVOC - non combustion - Decorative paint applicatoin - air'] + data_full['NMVOC - non combustion - Degreasing - air'] + data_full['NMVOC - non combustion - Diesel distribution - transport and depots (used in mobile sources) - air'] + data_full['NMVOC - non combustion - Diesel distribution - transport and depots (used in stationary sources) - air'] + data_full['NMVOC - non combustion - Dry cleaning - air'] + data_full['NMVOC - non combustion - Extraction, proc. and distribution of gaseous fuels - air'] + data_full['NMVOC - non combustion - Extraction, proc. and distribution of liquid fuels - air'] + data_full['NMVOC - non combustion - Extraction/production of (natural) gas - air'] + data_full['NMVOC - non combustion - Extraction/production of crude oil - air'] + data_full['NMVOC - non combustion - Fat, edible and non-edible oil extraction - air'] + data_full['NMVOC - non combustion - Fish, dried, salted or in brine; smoked fish; edible fish meal - air'] + data_full['NMVOC - non combustion - Fish, fish fillets, other fish meat and fish livers and roes, frozen - air'] + data_full['NMVOC - non combustion - Fish, otherwise prepared or preserved; caviar - air'] + data_full['NMVOC - non combustion - Flexography and rotogravure in packaging - air'] + data_full['NMVOC - non combustion - Gasoline distribution - service stations - air'] + data_full['NMVOC - non combustion - Gasoline distribution - transport and depots (used in mobile sources) - air'] + data_full['NMVOC - non combustion - Gasoline distribution - transport and depots (used in stationary sources) - air'] + data_full['NMVOC - non combustion - Industrial application of adhesives (use of high performance solvent based adhesives) - air'] + data_full['NMVOC - non combustion - Industrial application of adhesives (use of traditional solvent based adhesives) - air'] + data_full['NMVOC - non combustion - Industrial paint application, general industry (continuous processes) - air'] + data_full['NMVOC - non combustion - Industrial paint application, general industry (plastic parts) - air'] + data_full['NMVOC - non combustion - Industrial paint application, general industry - air'] + data_full['NMVOC - non combustion - Inorganic chemical industry, fertilizers and other - air'] + data_full['NMVOC - non combustion - Leather coating - air'] + data_full['NMVOC - non combustion - Manufacture of automobiles - air'] + data_full['NMVOC - non combustion - Mutton and lamb - air'] + data_full['NMVOC - non combustion - Oil refinery - air'] + data_full['NMVOC - non combustion - Organic chemical industry - downstream units - air'] + data_full['NMVOC - non combustion - Organic chemical industry, storage - air'] + data_full['NMVOC - non combustion - Other industrial use of solvents - air'] + data_full['NMVOC - non combustion - Pharmaceutical industry - air'] + data_full['NMVOC - non combustion - Polystyrene processing - air'] + data_full['NMVOC - non combustion - Polyvinylchloride produceduction by suspension process - air'] + data_full['NMVOC - non combustion - Pork - air'] + data_full['NMVOC - non combustion - Poultry, dressed - air'] + data_full['NMVOC - non combustion - Printing, offset - air'] + data_full['NMVOC - non combustion - Products incorporating solvents - air'] + data_full['NMVOC - non combustion - Raw sugar - air'] + data_full['NMVOC - non combustion - Rotogravure in publication - air'] + data_full['NMVOC - non combustion - Screen printing - air'] + data_full['NMVOC - non combustion - Steam cracking (ethylene and propylene production) - air'] + data_full['NMVOC - non combustion - Synthetic rubber - air'] + data_full['NMVOC - non combustion - Tyre production - air'] + data_full['NMVOC - non combustion - Vehicle refinishing - air'] + data_full['NMVOC - non combustion - Wire coating - air']

# drop the original columns
data_full = data_full.drop([
    'NMVOC - non combustion - Beef and veal - air',
    'NMVOC - non combustion - Coil coating (coating of aluminum and steel) - air',
    'NMVOC - non combustion - Decorative paint applicatoin - air',
    'NMVOC - non combustion - Degreasing - air',
    'NMVOC - non combustion - Diesel distribution - transport and depots (used in mobile sources) - air',
    'NMVOC - non combustion - Diesel distribution - transport and depots (used in stationary sources) - air',
    'NMVOC - non combustion - Dry cleaning - air',
    'NMVOC - non combustion - Extraction, proc. and distribution of gaseous fuels - air',
    'NMVOC - non combustion - Extraction, proc. and distribution of liquid fuels - air',
    'NMVOC - non combustion - Extraction/production of (natural) gas - air',
    'NMVOC - non combustion - Extraction/production of crude oil - air',
    'NMVOC - non combustion - Fat, edible and non-edible oil extraction - air',
    'NMVOC - non combustion - Fish, dried, salted or in brine; smoked fish; edible fish meal - air',
    'NMVOC - non combustion - Fish, fish fillets, other fish meat and fish livers and roes, frozen - air',
    'NMVOC - non combustion - Fish, otherwise prepared or preserved; caviar - air',
    'NMVOC - non combustion - Flexography and rotogravure in packaging - air',
    'NMVOC - non combustion - Gasoline distribution - service stations - air',
    'NMVOC - non combustion - Gasoline distribution - transport and depots (used in mobile sources) - air',
    'NMVOC - non combustion - Gasoline distribution - transport and depots (used in stationary sources) - air',
    'NMVOC - non combustion - Industrial application of adhesives (use of high performance solvent based adhesives) - air',
    'NMVOC - non combustion - Industrial application of adhesives (use of traditional solvent based adhesives) - air',
    'NMVOC - non combustion - Industrial paint application, general industry (continuous processes) - air',
    'NMVOC - non combustion - Industrial paint application, general industry (plastic parts) - air',
    'NMVOC - non combustion - Industrial paint application, general industry - air',
    'NMVOC - non combustion - Inorganic chemical industry, fertilizers and other - air',
    'NMVOC - non combustion - Leather coating - air',
    'NMVOC - non combustion - Manufacture of automobiles - air',
    'NMVOC - non combustion - Mutton and lamb - air',
    'NMVOC - non combustion - Oil refinery - air',
    'NMVOC - non combustion - Organic chemical industry - downstream units - air',
    'NMVOC - non combustion - Organic chemical industry, storage - air',
    'NMVOC - non combustion - Other industrial use of solvents - air',
    'NMVOC - non combustion - Pharmaceutical industry - air',
    'NMVOC - non combustion - Polystyrene processing - air',
    'NMVOC - non combustion - Polyvinylchloride produceduction by suspension process - air',
    'NMVOC - non combustion - Pork - air',
    'NMVOC - non combustion - Poultry, dressed - air',
    'NMVOC - non combustion - Printing, offset - air',
    'NMVOC - non combustion - Products incorporating solvents - air',
    'NMVOC - non combustion - Raw sugar - air',
    'NMVOC - non combustion - Rotogravure in publication - air',
    'NMVOC - non combustion - Screen printing - air',
    'NMVOC - non combustion - Steam cracking (ethylene and propylene production) - air',
    'NMVOC - non combustion - Synthetic rubber - air',
    'NMVOC - non combustion - Tyre production - air',
    'NMVOC - non combustion - Vehicle refinishing - air',
    'NMVOC - non combustion - Wire coating - air'], axis=1)

In [123]:
# column that sums gasses non combusted in air
data_full['gov_NOx_non_combustion_air_total'] = data_full['NOx - non combustion - Agglomeration plant - pellets - air'] + data_full['NOx - non combustion - Agglomeration plant - sinter - air'] + data_full['NOx - non combustion - Bricks production - air'] + data_full['NOx - non combustion - Cement production - air'] + data_full['NOx - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['NOx - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['NOx - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['NOx - non combustion - Glass production - air'] + data_full['NOx - non combustion - Lime production - air'] + data_full['NOx - non combustion - Nickel, unwrought - air'] + data_full['NOx - non combustion - Oil refinery - air'] + data_full['NOx - non combustion - Pig iron production, blast furnace - air'] + data_full['NOx - non combustion - Production of coke oven coke - air'] + data_full['NOx - non combustion - Production of gascoke - air'] + data_full['NOx - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['NOx - non combustion - Refined lead, unwrought - air'] + data_full['NOx - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['NOx - non combustion - Steel production: basic oxygen furnace - air'] + data_full['NOx - non combustion - Steel production: electric arc furnace - air'] + data_full['NOx - non combustion - Sulphuric acid production - air'] + data_full['NOx - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['NOx - non combustion - Zinc, unwrought, not alloyed - air']

# drop the original columns
data_full = data_full.drop([
    'NOx - non combustion - Agglomeration plant - pellets - air',
    'NOx - non combustion - Agglomeration plant - sinter - air',
    'NOx - non combustion - Bricks production - air',
    'NOx - non combustion - Cement production - air',
    'NOx - non combustion - Chemical wood pulp, dissolving grades - air',
    'NOx - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'NOx - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'NOx - non combustion - Glass production - air',
    'NOx - non combustion - Lime production - air',
    'NOx - non combustion - Nickel, unwrought - air',
    'NOx - non combustion - Oil refinery - air',
    'NOx - non combustion - Pig iron production, blast furnace - air',
    'NOx - non combustion - Production of coke oven coke - air',
    'NOx - non combustion - Production of gascoke - air',
    'NOx - non combustion - Refined copper; unwrought, not alloyed - air',
    'NOx - non combustion - Refined lead, unwrought - air',
    'NOx - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'NOx - non combustion - Steel production: basic oxygen furnace - air',
    'NOx - non combustion - Steel production: electric arc furnace - air',
    'NOx - non combustion - Sulphuric acid production - air',
    'NOx - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'NOx - non combustion - Zinc, unwrought, not alloyed - air'], axis = 1)

In [124]:
# column that sums gasses non combusted in air
data_full['gov_PM10_non_combustion_air_total'] = data_full['PM10 - non combustion - Agglomeration plant - pellets - air'] + data_full['PM10 - non combustion - Agglomeration plant - sinter - air'] + data_full['PM10 - non combustion - Aluminium ores and concentrates (Bauxite) - air'] + data_full['PM10 - non combustion - Bricks production - air'] + data_full['PM10 - non combustion - Briquettes production - air'] + data_full['PM10 - non combustion - Carbon black production - air'] + data_full['PM10 - non combustion - Cast iron production (grey iron foundries) - air'] + data_full['PM10 - non combustion - Cement production - air'] + data_full['PM10 - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['PM10 - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['PM10 - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['PM10 - non combustion - Chromium ores and concentrates - air'] + data_full['PM10 - non combustion - Copper ores and concentrates - air'] + data_full['PM10 - non combustion - Fertilizer production (N-fertilizer) - air'] + data_full['PM10 - non combustion - Glass production - air'] + data_full['PM10 - non combustion - Gold ores and concentrates - air'] + data_full['PM10 - non combustion - Iron ores and concentrates - air'] + data_full['PM10 - non combustion - Lead ores and concentrates - air'] + data_full['PM10 - non combustion - Lime production - air'] + data_full['PM10 - non combustion - Mining of antracite - air'] + data_full['PM10 - non combustion - Mining of bituminous coal - air'] + data_full['PM10 - non combustion - Mining of coking coal - air'] + data_full['PM10 - non combustion - Mining of lignite (brown coal) - air'] + data_full['PM10 - non combustion - Mining of sub-bituminous coal - air'] + data_full['PM10 - non combustion - Molybdenum ores and concentrates - air'] + data_full['PM10 - non combustion - N- fertilizer production - air'] + data_full['PM10 - non combustion - Nickel ores and concentrates - air'] + data_full['PM10 - non combustion - Nickel, unwrought - air'] + data_full['PM10 - non combustion - Oil refinery - air'] + data_full['PM10 - non combustion - Pig iron production, blast furnace - air'] + data_full['PM10 - non combustion - Platinum ores and concentrates - air'] + data_full['PM10 - non combustion - Primary aluminium production - air'] + data_full['PM10 - non combustion - Production of coke oven coke - air'] + data_full['PM10 - non combustion - Production of gascoke - air'] + data_full['PM10 - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['PM10 - non combustion - Refined lead, unwrought - air'] + data_full['PM10 - non combustion - Secondary aluminium production - air'] + data_full['PM10 - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['PM10 - non combustion - Silver ores and concentrates - air'] + data_full['PM10 - non combustion - Steel production: basic oxygen furnace - air'] + data_full['PM10 - non combustion - Steel production: electric arc furnace - air'] + data_full['PM10 - non combustion - Steel production: open hearth furnace - air'] + data_full['PM10 - non combustion - Tin ores and concentrates - air'] + data_full['PM10 - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['PM10 - non combustion - Zinc ores and concentrates - air'] + data_full['PM10 - non combustion - Zinc, unwrought, not alloyed - air']

# drop the original columns
data_full = data_full.drop([
    'PM10 - non combustion - Agglomeration plant - pellets - air',
    'PM10 - non combustion - Agglomeration plant - sinter - air',
    'PM10 - non combustion - Aluminium ores and concentrates (Bauxite) - air',
    'PM10 - non combustion - Bricks production - air',
    'PM10 - non combustion - Briquettes production - air',
    'PM10 - non combustion - Carbon black production - air',
    'PM10 - non combustion - Cast iron production (grey iron foundries) - air',
    'PM10 - non combustion - Cement production - air',
    'PM10 - non combustion - Chemical wood pulp, dissolving grades - air',
    'PM10 - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'PM10 - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'PM10 - non combustion - Chromium ores and concentrates - air',
    'PM10 - non combustion - Copper ores and concentrates - air',
    'PM10 - non combustion - Fertilizer production (N-fertilizer) - air',
    'PM10 - non combustion - Glass production - air',
    'PM10 - non combustion - Gold ores and concentrates - air',
    'PM10 - non combustion - Iron ores and concentrates - air',
    'PM10 - non combustion - Lead ores and concentrates - air',
    'PM10 - non combustion - Lime production - air',
    'PM10 - non combustion - Mining of antracite - air',
    'PM10 - non combustion - Mining of bituminous coal - air',
    'PM10 - non combustion - Mining of coking coal - air',
    'PM10 - non combustion - Mining of lignite (brown coal) - air',
    'PM10 - non combustion - Mining of sub-bituminous coal - air',
    'PM10 - non combustion - Molybdenum ores and concentrates - air',
    'PM10 - non combustion - N- fertilizer production - air',
    'PM10 - non combustion - Nickel ores and concentrates - air',
    'PM10 - non combustion - Nickel, unwrought - air',
    'PM10 - non combustion - Oil refinery - air',
    'PM10 - non combustion - Pig iron production, blast furnace - air',
    'PM10 - non combustion - Platinum ores and concentrates - air',
    'PM10 - non combustion - Primary aluminium production - air',
    'PM10 - non combustion - Production of coke oven coke - air',
    'PM10 - non combustion - Production of gascoke - air',
    'PM10 - non combustion - Refined copper; unwrought, not alloyed - air',
    'PM10 - non combustion - Refined lead, unwrought - air',
    'PM10 - non combustion - Secondary aluminium production - air',
    'PM10 - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'PM10 - non combustion - Silver ores and concentrates - air',
    'PM10 - non combustion - Steel production: basic oxygen furnace - air',
    'PM10 - non combustion - Steel production: electric arc furnace - air',
    'PM10 - non combustion - Steel production: open hearth furnace - air',
    'PM10 - non combustion - Tin ores and concentrates - air',
    'PM10 - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'PM10 - non combustion - Zinc ores and concentrates - air',
    'PM10 - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [125]:
# column that sums gasses non combusted in air
data_full['gov_PM2.5_non_combustion_air_total'] = data_full['PM2.5 - non combustion - Agglomeration plant - pellets - air'] + data_full['PM2.5 - non combustion - Agglomeration plant - sinter - air'] + data_full['PM2.5 - non combustion - Aluminium ores and concentrates (Bauxite) - air'] + data_full['PM2.5 - non combustion - Bricks production - air'] + data_full['PM2.5 - non combustion - Briquettes production - air'] + data_full['PM2.5 - non combustion - Carbon black production - air'] + data_full['PM2.5 - non combustion - Cast iron production (grey iron foundries) - air'] + data_full['PM2.5 - non combustion - Cement production - air'] + data_full['PM2.5 - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['PM2.5 - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['PM2.5 - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['PM2.5 - non combustion - Chromium ores and concentrates - air'] + data_full['PM2.5 - non combustion - Copper ores and concentrates - air'] + data_full['PM2.5 - non combustion - Fertilizer production (N-fertilizer) - air'] + data_full['PM2.5 - non combustion - Glass production - air'] + data_full['PM2.5 - non combustion - Gold ores and concentrates - air'] + data_full['PM2.5 - non combustion - Iron ores and concentrates - air'] + data_full['PM2.5 - non combustion - Lead ores and concentrates - air'] + data_full['PM2.5 - non combustion - Lime production - air'] + data_full['PM2.5 - non combustion - Mining of antracite - air'] + data_full['PM2.5 - non combustion - Mining of bituminous coal - air'] + data_full['PM2.5 - non combustion - Mining of coking coal - air'] + data_full['PM2.5 - non combustion - Mining of lignite (brown coal) - air'] + data_full['PM2.5 - non combustion - Mining of sub-bituminous coal - air'] + data_full['PM2.5 - non combustion - Molybdenum ores and concentrates - air'] + data_full['PM2.5 - non combustion - N- fertilizer production - air'] + data_full['PM2.5 - non combustion - Nickel ores and concentrates - air'] + data_full['PM2.5 - non combustion - Nickel, unwrought - air'] + data_full['PM2.5 - non combustion - Oil refinery - air'] + data_full['PM2.5 - non combustion - Pig iron production, blast furnace - air'] + data_full['PM2.5 - non combustion - Platinum ores and concentrates - air'] + data_full['PM2.5 - non combustion - Primary aluminium production - air'] + data_full['PM2.5 - non combustion - Production of coke oven coke - air'] + data_full['PM2.5 - non combustion - Production of gascoke - air'] + data_full['PM2.5 - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['PM2.5 - non combustion - Refined lead, unwrought - air'] + data_full['PM2.5 - non combustion - Secondary aluminium production - air'] + data_full['PM2.5 - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['PM2.5 - non combustion - Silver ores and concentrates - air'] + data_full['PM2.5 - non combustion - Steel production: basic oxygen furnace - air'] + data_full['PM2.5 - non combustion - Steel production: electric arc furnace - air'] + data_full['PM2.5 - non combustion - Steel production: open hearth furnace - air'] + data_full['PM2.5 - non combustion - Tin ores and concentrates - air'] + data_full['PM2.5 - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['PM2.5 - non combustion - Zinc ores and concentrates - air'] + data_full['PM2.5 - non combustion - Zinc, unwrought, not alloyed - air']

# drop the original columns
data_full = data_full.drop([
    'PM2.5 - non combustion - Agglomeration plant - pellets - air',
    'PM2.5 - non combustion - Agglomeration plant - sinter - air',
    'PM2.5 - non combustion - Aluminium ores and concentrates (Bauxite) - air',
    'PM2.5 - non combustion - Bricks production - air',
    'PM2.5 - non combustion - Briquettes production - air',
    'PM2.5 - non combustion - Carbon black production - air',
    'PM2.5 - non combustion - Cast iron production (grey iron foundries) - air',
    'PM2.5 - non combustion - Cement production - air',
    'PM2.5 - non combustion - Chemical wood pulp, dissolving grades - air',
    'PM2.5 - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'PM2.5 - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'PM2.5 - non combustion - Chromium ores and concentrates - air',
    'PM2.5 - non combustion - Copper ores and concentrates - air',
    'PM2.5 - non combustion - Fertilizer production (N-fertilizer) - air',
    'PM2.5 - non combustion - Glass production - air',
    'PM2.5 - non combustion - Gold ores and concentrates - air',
    'PM2.5 - non combustion - Iron ores and concentrates - air',
    'PM2.5 - non combustion - Lead ores and concentrates - air',
    'PM2.5 - non combustion - Lime production - air',
    'PM2.5 - non combustion - Mining of antracite - air',
    'PM2.5 - non combustion - Mining of bituminous coal - air',
    'PM2.5 - non combustion - Mining of coking coal - air',
    'PM2.5 - non combustion - Mining of lignite (brown coal) - air',
    'PM2.5 - non combustion - Mining of sub-bituminous coal - air',
    'PM2.5 - non combustion - Molybdenum ores and concentrates - air',
    'PM2.5 - non combustion - N- fertilizer production - air',
    'PM2.5 - non combustion - Nickel ores and concentrates - air',
    'PM2.5 - non combustion - Nickel, unwrought - air',
    'PM2.5 - non combustion - Oil refinery - air',
    'PM2.5 - non combustion - Pig iron production, blast furnace - air',
    'PM2.5 - non combustion - Platinum ores and concentrates - air',
    'PM2.5 - non combustion - Primary aluminium production - air',
    'PM2.5 - non combustion - Production of coke oven coke - air',
    'PM2.5 - non combustion - Production of gascoke - air',
    'PM2.5 - non combustion - Refined copper; unwrought, not alloyed - air',
    'PM2.5 - non combustion - Refined lead, unwrought - air',
    'PM2.5 - non combustion - Secondary aluminium production - air',
    'PM2.5 - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'PM2.5 - non combustion - Silver ores and concentrates - air',
    'PM2.5 - non combustion - Steel production: basic oxygen furnace - air',
    'PM2.5 - non combustion - Steel production: electric arc furnace - air',
    'PM2.5 - non combustion - Steel production: open hearth furnace - air',
    'PM2.5 - non combustion - Tin ores and concentrates - air',
    'PM2.5 - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'PM2.5 - non combustion - Zinc ores and concentrates - air',
    'PM2.5 - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [126]:
# column that sums gasses non combusted in air
data_full['gov_TSP_non_combustion_air_total'] = data_full['TSP - non combustion - Agglomeration plant - pellets - air'] + data_full['TSP - non combustion - Agglomeration plant - sinter - air'] + data_full['TSP - non combustion - Aluminium ores and concentrates (Bauxite) - air'] + data_full['TSP - non combustion - Bricks production - air'] + data_full['TSP - non combustion - Briquettes production - air'] + data_full['TSP - non combustion - Carbon black production - air'] + data_full['TSP - non combustion - Cast iron production (grey iron foundries) - air'] + data_full['TSP - non combustion - Cement production - air'] + data_full['TSP - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['TSP - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['TSP - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['TSP - non combustion - Chromium ores and concentrates - air'] + data_full['TSP - non combustion - Copper ores and concentrates - air'] + data_full['TSP - non combustion - Fertilizer production (N-fertilizer) - air'] + data_full['TSP - non combustion - Glass production - air'] + data_full['TSP - non combustion - Gold ores and concentrates - air'] + data_full['TSP - non combustion - Iron ores and concentrates - air'] + data_full['TSP - non combustion - Lead ores and concentrates - air'] + data_full['TSP - non combustion - Lime production - air'] + data_full['TSP - non combustion - Mining of antracite - air'] + data_full['TSP - non combustion - Mining of bituminous coal - air'] + data_full['TSP - non combustion - Mining of coking coal - air'] + data_full['TSP - non combustion - Mining of lignite (brown coal) - air'] + data_full['TSP - non combustion - Mining of sub-bituminous coal - air'] + data_full['TSP - non combustion - Molybdenum ores and concentrates - air'] + data_full['TSP - non combustion - N- fertilizer production - air'] + data_full['TSP - non combustion - Nickel ores and concentrates - air'] + data_full['TSP - non combustion - Nickel, unwrought - air'] + data_full['TSP - non combustion - Oil refinery - air'] + data_full['TSP - non combustion - Pig iron production, blast furnace - air'] + data_full['TSP - non combustion - Platinum ores and concentrates - air'] + data_full['TSP - non combustion - Primary aluminium production - air'] + data_full['TSP - non combustion - Production of coke oven coke - air'] + data_full['TSP - non combustion - Production of gascoke - air'] + data_full['TSP - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['TSP - non combustion - Refined lead, unwrought - air'] + data_full['TSP - non combustion - Secondary aluminium production - air'] + data_full['TSP - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['TSP - non combustion - Silver ores and concentrates - air'] + data_full['TSP - non combustion - Steel production: basic oxygen furnace - air'] + data_full['TSP - non combustion - Steel production: electric arc furnace - air'] + data_full['TSP - non combustion - Steel production: open hearth furnace - air'] + data_full['TSP - non combustion - Tin ores and concentrates - air'] + data_full['TSP - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['TSP - non combustion - Zinc ores and concentrates - air'] + data_full['TSP - non combustion - Zinc, unwrought, not alloyed - air'] + data_full['Domestic Extraction Used - Grazing and Fodder'] + data_full['Domestic Extraction Used - Forestry and Timber'] + data_full['Domestic Extraction Used â\x80\x93 Fisheries'] + data_full['Domestic Extraction Used - Non-metalic Minerals'] + data_full['Domestic Extraction Used - Iron Ore'] + data_full['Domestic Extraction Used - Non-ferous metal ores']

# drop the original columns
data_full = data_full.drop([
    'TSP - non combustion - Agglomeration plant - pellets - air',
    'TSP - non combustion - Agglomeration plant - sinter - air',
    'TSP - non combustion - Aluminium ores and concentrates (Bauxite) - air',
    'TSP - non combustion - Bricks production - air',
    'TSP - non combustion - Briquettes production - air',
    'TSP - non combustion - Carbon black production - air',
    'TSP - non combustion - Cast iron production (grey iron foundries) - air',
    'TSP - non combustion - Cement production - air',
    'TSP - non combustion - Chemical wood pulp, dissolving grades - air',
    'TSP - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'TSP - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'TSP - non combustion - Chromium ores and concentrates - air',
    'TSP - non combustion - Copper ores and concentrates - air',
    'TSP - non combustion - Fertilizer production (N-fertilizer) - air',
    'TSP - non combustion - Glass production - air',
    'TSP - non combustion - Gold ores and concentrates - air',
    'TSP - non combustion - Iron ores and concentrates - air',
    'TSP - non combustion - Lead ores and concentrates - air',
    'TSP - non combustion - Lime production - air',
    'TSP - non combustion - Mining of antracite - air',
    'TSP - non combustion - Mining of bituminous coal - air',
    'TSP - non combustion - Mining of coking coal - air',
    'TSP - non combustion - Mining of lignite (brown coal) - air',
    'TSP - non combustion - Mining of sub-bituminous coal - air',
    'TSP - non combustion - Molybdenum ores and concentrates - air',
    'TSP - non combustion - N- fertilizer production - air',
    'TSP - non combustion - Nickel ores and concentrates - air',
    'TSP - non combustion - Nickel, unwrought - air',
    'TSP - non combustion - Oil refinery - air',
    'TSP - non combustion - Pig iron production, blast furnace - air',
    'TSP - non combustion - Platinum ores and concentrates - air',
    'TSP - non combustion - Primary aluminium production - air',
    'TSP - non combustion - Production of coke oven coke - air',
    'TSP - non combustion - Production of gascoke - air',
    'TSP - non combustion - Refined copper; unwrought, not alloyed - air',
    'TSP - non combustion - Refined lead, unwrought - air',
    'TSP - non combustion - Secondary aluminium production - air',
    'TSP - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'TSP - non combustion - Silver ores and concentrates - air',
    'TSP - non combustion - Steel production: basic oxygen furnace - air',
    'TSP - non combustion - Steel production: electric arc furnace - air',
    'TSP - non combustion - Steel production: open hearth furnace - air',
    'TSP - non combustion - Tin ores and concentrates - air',
    'TSP - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'TSP - non combustion - Zinc ores and concentrates - air',
    'TSP - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [127]:
# column that sums domestic extraction used with plants
data_full['gov_Domestic_Extraction_Used_Primary_Crops_total'] = data_full['Domestic Extraction Used - Crop residues - Feed'] + data_full['Domestic Extraction Used - Crop residues - Straw'] + data_full['Domestic Extraction Used - Fishery - Aquatic plants'] + data_full['Domestic Extraction Used - Fishery - Inland waters fish catch'] + data_full['Domestic Extraction Used - Fishery - Marine fish catch'] + data_full['Domestic Extraction Used - Fishery - Other (e.g. Aquatic mammals)'] + data_full['Domestic Extraction Used - Fodder crops - Alfalfa for Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Beets for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Cabbage for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Carrots for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Clover for Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Forage Products nec'] + data_full['Domestic Extraction Used - Fodder crops - Grasses nec for Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Green Oilseeds for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Leguminous nec for forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Maize for Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Other grasses'] + data_full['Domestic Extraction Used - Fodder crops - Rye Grass, Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Sorghum for Forage and Silage'] + data_full['Domestic Extraction Used - Fodder crops - Swedes for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Turnips for Fodder'] + data_full['Domestic Extraction Used - Fodder crops - Vegetables and Roots, Fodder'] + data_full['Domestic Extraction Used - Forestry - Coniferous wood - Industrial roundwood'] + data_full['Domestic Extraction Used - Forestry - Coniferous wood - Wood fuel'] + data_full['Domestic Extraction Used - Forestry - Kapok Fruit'] + data_full['Domestic Extraction Used - Forestry - Natural Gums'] + data_full['Domestic Extraction Used - Forestry - Non-coniferous wood - Industrial roundwood'] + data_full['Domestic Extraction Used - Forestry - Non-coniferous wood - Wood fuel'] + data_full['Domestic Extraction Used - Forestry - Raw materials other than wood'] + data_full['Domestic Extraction Used - Fossil Fuel: Total'] + data_full['Domestic Extraction Used - Grazing'] + data_full['Domestic Extraction Used - Metal Ores - Bauxite and aluminium ores'] + data_full['Domestic Extraction Used - Metal Ores - Copper ores'] + data_full['Domestic Extraction Used - Metal Ores - Gold ores'] + data_full['Domestic Extraction Used - Metal Ores - Iron ores'] + data_full['Domestic Extraction Used - Metal Ores - Lead ores'] + data_full['Domestic Extraction Used - Metal Ores - Nickel ores'] + data_full['Domestic Extraction Used - Metal Ores - Other non-ferrous metal ores'] + data_full['Domestic Extraction Used - Metal Ores - PGM ores'] + data_full['Domestic Extraction Used - Metal Ores - Silver ores'] + data_full['Domestic Extraction Used - Metal Ores - Tin ores'] + data_full['Domestic Extraction Used - Metal Ores - Uranium and thorium ores'] + data_full['Domestic Extraction Used - Metal Ores - Zinc ores'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Building stones'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Chemical and fertilizer minerals'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Clays and kaolin'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Gravel and sand'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Limestone, gypsum, chalk, dolomite'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Other minerals'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Salt'] + data_full['Domestic Extraction Used - Non-Metallic Minerals - Slate'] + data_full['Domestic Extraction Used - Primary Crops - Kapokseed in Shell'] + data_full['Domestic Extraction Used - Primary Crops - Honey'] + data_full['Domestic Extraction Used - Primary Crops - Beeswax'] + data_full['Domestic Extraction Used - Primary Crops - Abaca'] + data_full['Domestic Extraction Used - Primary Crops - Agave Fibres nes'] + data_full['Domestic Extraction Used - Primary Crops - Almonds'] + data_full['Domestic Extraction Used - Primary Crops - Anise, Badian, Fennel'] + data_full['Domestic Extraction Used - Primary Crops - Apples'] + data_full['Domestic Extraction Used - Primary Crops - Apricots'] + data_full['Domestic Extraction Used - Primary Crops - Arecanuts'] + data_full['Domestic Extraction Used - Primary Crops - Artichokes'] + data_full['Domestic Extraction Used - Primary Crops - Asparagus'] + data_full['Domestic Extraction Used - Primary Crops - Avocados'] + data_full['Domestic Extraction Used - Primary Crops - Bambara beans'] + data_full['Domestic Extraction Used - Primary Crops - Bananas'] + data_full['Domestic Extraction Used - Primary Crops - Barley'] + data_full['Domestic Extraction Used - Primary Crops - Beans, dry'] + data_full['Domestic Extraction Used - Primary Crops - Beans, green'] + data_full['Domestic Extraction Used - Primary Crops - Berries nec'] + data_full['Domestic Extraction Used - Primary Crops - Blueberries'] + data_full['Domestic Extraction Used - Primary Crops - Brazil nuts, with shell'] + data_full['Domestic Extraction Used - Primary Crops - Broad beans, horse beans, dry'] + data_full['Domestic Extraction Used - Primary Crops - Buckwheat'] + data_full['Domestic Extraction Used - Primary Crops - Cabbages'] + data_full['Domestic Extraction Used - Primary Crops - Canary Seed'] + data_full['Domestic Extraction Used - Primary Crops - Carobs'] + data_full['Domestic Extraction Used - Primary Crops - Carrots'] + data_full['Domestic Extraction Used - Primary Crops - Cashew nuts, with shell'] + data_full['Domestic Extraction Used - Primary Crops - Cashewapple'] + data_full['Domestic Extraction Used - Primary Crops - Cassava'] + data_full['Domestic Extraction Used - Primary Crops - Cassava leaves'] + data_full['Domestic Extraction Used - Primary Crops - Castor oil seed'] + data_full['Domestic Extraction Used - Primary Crops - Cauliflower'] + data_full['Domestic Extraction Used - Primary Crops - Cereals nec'] + data_full['Domestic Extraction Used - Primary Crops - Cherries'] + data_full['Domestic Extraction Used - Primary Crops - Chestnuts'] + data_full['Domestic Extraction Used - Primary Crops - Chick peas'] + data_full['Domestic Extraction Used - Primary Crops - Chicory Roots'] + data_full['Domestic Extraction Used - Primary Crops - Chillies and peppers, dry'] + data_full['Domestic Extraction Used - Primary Crops - Chillies and peppers, green'] + data_full['Domestic Extraction Used - Primary Crops - Cinnamon'] + data_full['Domestic Extraction Used - Primary Crops - Citrus Fruit nec'] + data_full['Domestic Extraction Used - Primary Crops - Cloves'] + data_full['Domestic Extraction Used - Primary Crops - Cocoa Beans'] + data_full['Domestic Extraction Used - Primary Crops - Coconuts'] + data_full['Domestic Extraction Used - Primary Crops - Coffee, Green'] + data_full['Domestic Extraction Used - Primary Crops - Coir'] + data_full['Domestic Extraction Used - Primary Crops - Cotton Lint'] + data_full['Domestic Extraction Used - Primary Crops - Cottonseed'] + data_full['Domestic Extraction Used - Primary Crops - Cow peas, dry'] + data_full['Domestic Extraction Used - Primary Crops - Cranberries'] + data_full['Domestic Extraction Used - Primary Crops - Cucumbers and Gherkins'] + data_full['Domestic Extraction Used - Primary Crops - Currants'] + data_full['Domestic Extraction Used - Primary Crops - Dates'] + data_full['Domestic Extraction Used - Primary Crops - Eggplants'] + data_full['Domestic Extraction Used - Primary Crops - Fibre Crops nes'] + data_full['Domestic Extraction Used - Primary Crops - Figs'] + data_full['Domestic Extraction Used - Primary Crops - Flax Fibre and Tow'] + data_full['Domestic Extraction Used - Primary Crops - Fonio'] + data_full['Domestic Extraction Used - Primary Crops - Fruit Fresh Nes'] + data_full['Domestic Extraction Used - Primary Crops - Fruit, tropical fresh nes'] + data_full['Domestic Extraction Used - Primary Crops - Garlic'] + data_full['Domestic Extraction Used - Primary Crops - Ginger'] + data_full['Domestic Extraction Used - Primary Crops - Gooseberries'] + data_full['Domestic Extraction Used - Primary Crops - Grapefruit and Pomelos'] + data_full['Domestic Extraction Used - Primary Crops - Grapes'] + data_full['Domestic Extraction Used - Primary Crops - Groundnuts in Shell'] + data_full['Domestic Extraction Used - Primary Crops - Hazelnuts'] + data_full['Domestic Extraction Used - Primary Crops - Hemp Fibre and Tow'] + data_full['Domestic Extraction Used - Primary Crops - Hempseed'] + data_full['Domestic Extraction Used - Primary Crops - Hops'] + data_full['Domestic Extraction Used - Primary Crops - Jojoba Seeds'] + data_full['Domestic Extraction Used - Primary Crops - Jute and Jute-like Fibres'] + data_full['Domestic Extraction Used - Primary Crops - Kapok Fibre'] + data_full['Domestic Extraction Used - Primary Crops - Karite Nuts'] + data_full['Domestic Extraction Used - Primary Crops - Kiwi Fruit'] + data_full['Domestic Extraction Used - Primary Crops - Kolanuts'] + data_full['Domestic Extraction Used - Primary Crops - Leeks and other Alliac. Veg.'] + data_full['Domestic Extraction Used - Primary Crops - Leguminous vegetables, nes'] + data_full['Domestic Extraction Used - Primary Crops - Lemons and Limes'] + data_full['Domestic Extraction Used - Primary Crops - Lentils'] + data_full['Domestic Extraction Used - Primary Crops - Lettuce'] + data_full['Domestic Extraction Used - Primary Crops - Linseed'] + data_full['Domestic Extraction Used - Primary Crops - Lupins'] + data_full['Domestic Extraction Used - Primary Crops - Maize'] + data_full['Domestic Extraction Used - Primary Crops - Maize, green'] + data_full['Domestic Extraction Used - Primary Crops - Mangoes, mangosteens, guavas'] + data_full['Domestic Extraction Used - Primary Crops - Mate'] + data_full['Domestic Extraction Used - Primary Crops - Melonseed'] + data_full['Domestic Extraction Used - Primary Crops - Millet'] + data_full['Domestic Extraction Used - Primary Crops - Mixed Grain'] + data_full['Domestic Extraction Used - Primary Crops - Mushrooms'] + data_full['Domestic Extraction Used - Primary Crops - Mustard Seed'] + data_full['Domestic Extraction Used - Primary Crops - Natural Rubber'] + data_full['Domestic Extraction Used - Primary Crops - Nutmeg, mace and cardamoms'] + data_full['Domestic Extraction Used - Primary Crops - Nuts, nes'] + data_full['Domestic Extraction Used - Primary Crops - Oats'] + data_full['Domestic Extraction Used - Primary Crops - Oil Palm Fruit'] + data_full['Domestic Extraction Used - Primary Crops - Oilseeds nec'] + data_full['Domestic Extraction Used - Primary Crops - Okra'] + data_full['Domestic Extraction Used - Primary Crops - Olives'] + data_full['Domestic Extraction Used - Primary Crops - Onions'] + data_full['Domestic Extraction Used - Primary Crops - Onions, dry'] + data_full['Domestic Extraction Used - Primary Crops - Oranges'] + data_full['Domestic Extraction Used - Primary Crops - Other Bastfibres'] + data_full['Domestic Extraction Used - Primary Crops - Other melons'] + data_full['Domestic Extraction Used - Primary Crops - Papayas'] + data_full['Domestic Extraction Used - Primary Crops - Peaches and Nectarines'] + data_full['Domestic Extraction Used - Primary Crops - Pears'] + data_full['Domestic Extraction Used - Primary Crops - Peas, Green'] + data_full['Domestic Extraction Used - Primary Crops - Peas, dry'] + data_full['Domestic Extraction Used - Primary Crops - Pepper'] + data_full['Domestic Extraction Used - Primary Crops - Peppermint'] + data_full['Domestic Extraction Used - Primary Crops - Persimmons'] + data_full['Domestic Extraction Used - Primary Crops - Pigeon peas'] + data_full['Domestic Extraction Used - Primary Crops - Pineapples'] + data_full['Domestic Extraction Used - Primary Crops - Pistachios'] + data_full['Domestic Extraction Used - Primary Crops - Plantains'] + data_full['Domestic Extraction Used - Primary Crops - Plums'] + data_full['Domestic Extraction Used - Primary Crops - Pome fruit, nes'] + data_full['Domestic Extraction Used - Primary Crops - Poppy Seed'] + data_full['Domestic Extraction Used - Primary Crops - Potatoes'] + data_full['Domestic Extraction Used - Primary Crops - Pulses nec'] + data_full['Domestic Extraction Used - Primary Crops - Pumpkins, Squash, Gourds'] + data_full['Domestic Extraction Used - Primary Crops - Pyrethrum, Dried Flowers'] + data_full['Domestic Extraction Used - Primary Crops - Quinces'] + data_full['Domestic Extraction Used - Primary Crops - Quinoa'] + data_full['Domestic Extraction Used - Primary Crops - Ramie'] + data_full['Domestic Extraction Used - Primary Crops - Rapeseed'] + data_full['Domestic Extraction Used - Primary Crops - Raspberries'] + data_full['Domestic Extraction Used - Primary Crops - Rice'] + data_full['Domestic Extraction Used - Primary Crops - Roots and Tubers, nes'] + data_full['Domestic Extraction Used - Primary Crops - Rye'] + data_full['Domestic Extraction Used - Primary Crops - Safflower Seed'] + data_full['Domestic Extraction Used - Primary Crops - Sesame Seed'] + data_full['Domestic Extraction Used - Primary Crops - Sisal'] + data_full['Domestic Extraction Used - Primary Crops - Sorghum'] + data_full['Domestic Extraction Used - Primary Crops - Sour Cherries'] + data_full['Domestic Extraction Used - Primary Crops - Soybeans'] + data_full['Domestic Extraction Used - Primary Crops - Spices nec'] + data_full['Domestic Extraction Used - Primary Crops - Spinach'] + data_full['Domestic Extraction Used - Primary Crops - Stone Fruit nec,'] + data_full['Domestic Extraction Used - Primary Crops - Strawberries'] + data_full['Domestic Extraction Used - Primary Crops - String beans'] + data_full['Domestic Extraction Used - Primary Crops - Sugar Beets'] + data_full['Domestic Extraction Used - Primary Crops - Sugar Cane'] + data_full['Domestic Extraction Used - Primary Crops - Sugar Crops nes'] + data_full['Domestic Extraction Used - Primary Crops - Sunflower Seed'] + data_full['Domestic Extraction Used - Primary Crops - Sweet Potatoes'] + data_full['Domestic Extraction Used - Primary Crops - Tallowtree Seeds'] + data_full['Domestic Extraction Used - Primary Crops - Tang. Mand Clement. Satsma'] + data_full['Domestic Extraction Used - Primary Crops - Taro'] + data_full['Domestic Extraction Used - Primary Crops - Tea'] + data_full['Domestic Extraction Used - Primary Crops - Tea nes'] + data_full['Domestic Extraction Used - Primary Crops - Tobacco Leaves'] + data_full['Domestic Extraction Used - Primary Crops - Tomatoes'] + data_full['Domestic Extraction Used - Primary Crops - Triticale'] + data_full['Domestic Extraction Used - Primary Crops - Tung Nuts'] + data_full['Domestic Extraction Used - Primary Crops - Vanilla'] + data_full['Domestic Extraction Used - Primary Crops - Vegetables Fresh nec'] + data_full['Domestic Extraction Used - Primary Crops - Vetches'] + data_full['Domestic Extraction Used - Primary Crops - Walnuts'] + data_full['Domestic Extraction Used - Primary Crops - Watermelons'] + data_full['Domestic Extraction Used - Primary Crops - Wheat'] + data_full['Domestic Extraction Used - Primary Crops - Yams'] + data_full['Domestic Extraction Used - Primary Crops - Yautia']

data_full = data_full.drop([
    'Domestic Extraction Used - Crop residues - Feed',
    'Domestic Extraction Used - Crop residues - Straw',
    'Domestic Extraction Used - Fishery - Aquatic plants',
    'Domestic Extraction Used - Fishery - Inland waters fish catch',
    'Domestic Extraction Used - Fishery - Marine fish catch',
    'Domestic Extraction Used - Fishery - Other (e.g. Aquatic mammals)',
    'Domestic Extraction Used - Fodder crops - Alfalfa for Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Beets for Fodder',
    'Domestic Extraction Used - Fodder crops - Cabbage for Fodder',
    'Domestic Extraction Used - Fodder crops - Carrots for Fodder',
    'Domestic Extraction Used - Fodder crops - Clover for Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Forage Products nec',
    'Domestic Extraction Used - Fodder crops - Grasses nec for Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Green Oilseeds for Fodder',
    'Domestic Extraction Used - Fodder crops - Leguminous nec for forage and Silage',
    'Domestic Extraction Used - Fodder crops - Maize for Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Other grasses',
    'Domestic Extraction Used - Fodder crops - Rye Grass, Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Sorghum for Forage and Silage',
    'Domestic Extraction Used - Fodder crops - Swedes for Fodder',
    'Domestic Extraction Used - Fodder crops - Turnips for Fodder',
    'Domestic Extraction Used - Fodder crops - Vegetables and Roots, Fodder',
    'Domestic Extraction Used - Forestry - Coniferous wood - Industrial roundwood',
    'Domestic Extraction Used - Forestry - Coniferous wood - Wood fuel',
    'Domestic Extraction Used - Forestry - Kapok Fruit',
    'Domestic Extraction Used - Forestry - Natural Gums',
    'Domestic Extraction Used - Forestry - Non-coniferous wood - Industrial roundwood',
    'Domestic Extraction Used - Forestry - Non-coniferous wood - Wood fuel',
    'Domestic Extraction Used - Forestry - Raw materials other than wood',
    'Domestic Extraction Used - Fossil Fuel: Total',
    'Domestic Extraction Used - Grazing',
    'Domestic Extraction Used - Metal Ores - Bauxite and aluminium ores',
    'Domestic Extraction Used - Metal Ores - Copper ores',
    'Domestic Extraction Used - Metal Ores - Gold ores',
    'Domestic Extraction Used - Metal Ores - Iron ores',
    'Domestic Extraction Used - Metal Ores - Lead ores',
    'Domestic Extraction Used - Metal Ores - Nickel ores',
    'Domestic Extraction Used - Metal Ores - Other non-ferrous metal ores',
    'Domestic Extraction Used - Metal Ores - PGM ores',
    'Domestic Extraction Used - Metal Ores - Silver ores',
    'Domestic Extraction Used - Metal Ores - Tin ores',
    'Domestic Extraction Used - Metal Ores - Uranium and thorium ores',
    'Domestic Extraction Used - Metal Ores - Zinc ores',
    'Domestic Extraction Used - Non-Metallic Minerals - Building stones',
    'Domestic Extraction Used - Non-Metallic Minerals - Chemical and fertilizer minerals',
    'Domestic Extraction Used - Non-Metallic Minerals - Clays and kaolin',
    'Domestic Extraction Used - Non-Metallic Minerals - Gravel and sand',
    'Domestic Extraction Used - Non-Metallic Minerals - Limestone, gypsum, chalk, dolomite',
    'Domestic Extraction Used - Non-Metallic Minerals - Other minerals',
    'Domestic Extraction Used - Non-Metallic Minerals - Salt',
    'Domestic Extraction Used - Non-Metallic Minerals - Slate',
    'Domestic Extraction Used - Primary Crops - Kapokseed in Shell',
    'Domestic Extraction Used - Primary Crops - Honey',
    'Domestic Extraction Used - Primary Crops - Beeswax',
    'Domestic Extraction Used - Primary Crops - Abaca',
    'Domestic Extraction Used - Primary Crops - Agave Fibres nes',
    'Domestic Extraction Used - Primary Crops - Almonds',
    'Domestic Extraction Used - Primary Crops - Anise, Badian, Fennel',
    'Domestic Extraction Used - Primary Crops - Apples',
    'Domestic Extraction Used - Primary Crops - Apricots',
    'Domestic Extraction Used - Primary Crops - Arecanuts',
    'Domestic Extraction Used - Primary Crops - Artichokes',
    'Domestic Extraction Used - Primary Crops - Asparagus',
    'Domestic Extraction Used - Primary Crops - Avocados',
    'Domestic Extraction Used - Primary Crops - Bambara beans',
    'Domestic Extraction Used - Primary Crops - Bananas',
    'Domestic Extraction Used - Primary Crops - Barley',
    'Domestic Extraction Used - Primary Crops - Beans, dry',
    'Domestic Extraction Used - Primary Crops - Beans, green',
    'Domestic Extraction Used - Primary Crops - Berries nec',
    'Domestic Extraction Used - Primary Crops - Blueberries',
    'Domestic Extraction Used - Primary Crops - Brazil nuts, with shell',
    'Domestic Extraction Used - Primary Crops - Broad beans, horse beans, dry',
    'Domestic Extraction Used - Primary Crops - Buckwheat',
    'Domestic Extraction Used - Primary Crops - Cabbages',
    'Domestic Extraction Used - Primary Crops - Canary Seed',
    'Domestic Extraction Used - Primary Crops - Carobs',
    'Domestic Extraction Used - Primary Crops - Carrots',
    'Domestic Extraction Used - Primary Crops - Cashew nuts, with shell',
    'Domestic Extraction Used - Primary Crops - Cashewapple',
    'Domestic Extraction Used - Primary Crops - Cassava',
    'Domestic Extraction Used - Primary Crops - Cassava leaves',
    'Domestic Extraction Used - Primary Crops - Castor oil seed',
    'Domestic Extraction Used - Primary Crops - Cauliflower',
    'Domestic Extraction Used - Primary Crops - Cereals nec',
    'Domestic Extraction Used - Primary Crops - Cherries',
    'Domestic Extraction Used - Primary Crops - Chestnuts',
    'Domestic Extraction Used - Primary Crops - Chick peas',
    'Domestic Extraction Used - Primary Crops - Chicory Roots',
    'Domestic Extraction Used - Primary Crops - Chillies and peppers, dry',
    'Domestic Extraction Used - Primary Crops - Chillies and peppers, green',
    'Domestic Extraction Used - Primary Crops - Cinnamon',
    'Domestic Extraction Used - Primary Crops - Citrus Fruit nec',
    'Domestic Extraction Used - Primary Crops - Cloves',
    'Domestic Extraction Used - Primary Crops - Cocoa Beans',
    'Domestic Extraction Used - Primary Crops - Coconuts',
    'Domestic Extraction Used - Primary Crops - Coffee, Green',
    'Domestic Extraction Used - Primary Crops - Coir',
    'Domestic Extraction Used - Primary Crops - Cotton Lint',
    'Domestic Extraction Used - Primary Crops - Cottonseed',
    'Domestic Extraction Used - Primary Crops - Cow peas, dry',
    'Domestic Extraction Used - Primary Crops - Cranberries',
    'Domestic Extraction Used - Primary Crops - Cucumbers and Gherkins',
    'Domestic Extraction Used - Primary Crops - Currants',
    'Domestic Extraction Used - Primary Crops - Dates',
    'Domestic Extraction Used - Primary Crops - Eggplants',
    'Domestic Extraction Used - Primary Crops - Fibre Crops nes',
    'Domestic Extraction Used - Primary Crops - Figs',
    'Domestic Extraction Used - Primary Crops - Flax Fibre and Tow',
    'Domestic Extraction Used - Primary Crops - Fonio',
    'Domestic Extraction Used - Primary Crops - Fruit Fresh Nes',
    'Domestic Extraction Used - Primary Crops - Fruit, tropical fresh nes',
    'Domestic Extraction Used - Primary Crops - Garlic',
    'Domestic Extraction Used - Primary Crops - Ginger',
    'Domestic Extraction Used - Primary Crops - Gooseberries',
    'Domestic Extraction Used - Primary Crops - Grapefruit and Pomelos',
    'Domestic Extraction Used - Primary Crops - Grapes',
    'Domestic Extraction Used - Primary Crops - Groundnuts in Shell',
    'Domestic Extraction Used - Primary Crops - Hazelnuts',
    'Domestic Extraction Used - Primary Crops - Hemp Fibre and Tow',
    'Domestic Extraction Used - Primary Crops - Hempseed',
    'Domestic Extraction Used - Primary Crops - Hops',
    'Domestic Extraction Used - Primary Crops - Jojoba Seeds',
    'Domestic Extraction Used - Primary Crops - Jute and Jute-like Fibres',
    'Domestic Extraction Used - Primary Crops - Kapok Fibre',
    'Domestic Extraction Used - Primary Crops - Karite Nuts',
    'Domestic Extraction Used - Primary Crops - Kiwi Fruit',
    'Domestic Extraction Used - Primary Crops - Kolanuts',
    'Domestic Extraction Used - Primary Crops - Leeks and other Alliac. Veg.',
    'Domestic Extraction Used - Primary Crops - Leguminous vegetables, nes',
    'Domestic Extraction Used - Primary Crops - Lemons and Limes',
    'Domestic Extraction Used - Primary Crops - Lentils',
    'Domestic Extraction Used - Primary Crops - Lettuce',
    'Domestic Extraction Used - Primary Crops - Linseed',
    'Domestic Extraction Used - Primary Crops - Lupins',
    'Domestic Extraction Used - Primary Crops - Maize',
    'Domestic Extraction Used - Primary Crops - Maize, green',
    'Domestic Extraction Used - Primary Crops - Mangoes, mangosteens, guavas',
    'Domestic Extraction Used - Primary Crops - Mate',
    'Domestic Extraction Used - Primary Crops - Melonseed',
    'Domestic Extraction Used - Primary Crops - Millet',
    'Domestic Extraction Used - Primary Crops - Mixed Grain',
    'Domestic Extraction Used - Primary Crops - Mushrooms',
    'Domestic Extraction Used - Primary Crops - Mustard Seed',
    'Domestic Extraction Used - Primary Crops - Natural Rubber',
    'Domestic Extraction Used - Primary Crops - Nutmeg, mace and cardamoms',
    'Domestic Extraction Used - Primary Crops - Nuts, nes',
    'Domestic Extraction Used - Primary Crops - Oats',
    'Domestic Extraction Used - Primary Crops - Oil Palm Fruit',
    'Domestic Extraction Used - Primary Crops - Oilseeds nec',
    'Domestic Extraction Used - Primary Crops - Okra',
    'Domestic Extraction Used - Primary Crops - Olives',
    'Domestic Extraction Used - Primary Crops - Onions',
    'Domestic Extraction Used - Primary Crops - Onions, dry',
    'Domestic Extraction Used - Primary Crops - Oranges',
    'Domestic Extraction Used - Primary Crops - Other Bastfibres',
    'Domestic Extraction Used - Primary Crops - Other melons',
    'Domestic Extraction Used - Primary Crops - Papayas',
    'Domestic Extraction Used - Primary Crops - Peaches and Nectarines',
    'Domestic Extraction Used - Primary Crops - Pears',
    'Domestic Extraction Used - Primary Crops - Peas, Green',
    'Domestic Extraction Used - Primary Crops - Peas, dry',
    'Domestic Extraction Used - Primary Crops - Pepper',
    'Domestic Extraction Used - Primary Crops - Peppermint',
    'Domestic Extraction Used - Primary Crops - Persimmons',
    'Domestic Extraction Used - Primary Crops - Pigeon peas',
    'Domestic Extraction Used - Primary Crops - Pineapples',
    'Domestic Extraction Used - Primary Crops - Pistachios',
    'Domestic Extraction Used - Primary Crops - Plantains',
    'Domestic Extraction Used - Primary Crops - Plums',
    'Domestic Extraction Used - Primary Crops - Pome fruit, nes',
    'Domestic Extraction Used - Primary Crops - Poppy Seed',
    'Domestic Extraction Used - Primary Crops - Potatoes',
    'Domestic Extraction Used - Primary Crops - Pulses nec',
    'Domestic Extraction Used - Primary Crops - Pumpkins, Squash, Gourds',
    'Domestic Extraction Used - Primary Crops - Pyrethrum, Dried Flowers',
    'Domestic Extraction Used - Primary Crops - Quinces',
    'Domestic Extraction Used - Primary Crops - Quinoa',
    'Domestic Extraction Used - Primary Crops - Ramie',
    'Domestic Extraction Used - Primary Crops - Rapeseed',
    'Domestic Extraction Used - Primary Crops - Raspberries',
    'Domestic Extraction Used - Primary Crops - Rice',
    'Domestic Extraction Used - Primary Crops - Roots and Tubers, nes',
    'Domestic Extraction Used - Primary Crops - Rye',
    'Domestic Extraction Used - Primary Crops - Safflower Seed',
    'Domestic Extraction Used - Primary Crops - Sesame Seed',
    'Domestic Extraction Used - Primary Crops - Sisal',
    'Domestic Extraction Used - Primary Crops - Sorghum',
    'Domestic Extraction Used - Primary Crops - Sour Cherries',
    'Domestic Extraction Used - Primary Crops - Soybeans',
    'Domestic Extraction Used - Primary Crops - Spices nec',
    'Domestic Extraction Used - Primary Crops - Spinach',
    'Domestic Extraction Used - Primary Crops - Stone Fruit nec,',
    'Domestic Extraction Used - Primary Crops - Strawberries',
    'Domestic Extraction Used - Primary Crops - String beans',
    'Domestic Extraction Used - Primary Crops - Sugar Beets',
    'Domestic Extraction Used - Primary Crops - Sugar Cane',
    'Domestic Extraction Used - Primary Crops - Sugar Crops nes',
    'Domestic Extraction Used - Primary Crops - Sunflower Seed',
    'Domestic Extraction Used - Primary Crops - Sweet Potatoes',
    'Domestic Extraction Used - Primary Crops - Tallowtree Seeds',
    'Domestic Extraction Used - Primary Crops - Tang. Mand Clement. Satsma',
    'Domestic Extraction Used - Primary Crops - Taro',
    'Domestic Extraction Used - Primary Crops - Tea',
    'Domestic Extraction Used - Primary Crops - Tea nes',
    'Domestic Extraction Used - Primary Crops - Tobacco Leaves',
    'Domestic Extraction Used - Primary Crops - Tomatoes',
    'Domestic Extraction Used - Primary Crops - Triticale',
    'Domestic Extraction Used - Primary Crops - Tung Nuts',
    'Domestic Extraction Used - Primary Crops - Vanilla',
    'Domestic Extraction Used - Primary Crops - Vegetables Fresh nec',
    'Domestic Extraction Used - Primary Crops - Vetches',
    'Domestic Extraction Used - Primary Crops - Walnuts',
    'Domestic Extraction Used - Primary Crops - Watermelons',
    'Domestic Extraction Used - Primary Crops - Wheat',
    'Domestic Extraction Used - Primary Crops - Yams',
    'Domestic Extraction Used - Primary Crops - Yautia',
    'Domestic Extraction Used - Crop and Crop Residue',
    'Domestic Extraction Used - Grazing and Fodder',
    'Domestic Extraction Used - Forestry and Timber',
    'Domestic Extraction Used â\x80\x93 Fisheries',
    'Domestic Extraction Used - Non-metalic Minerals',
    'Domestic Extraction Used - Iron Ore',
    'Domestic Extraction Used - Non-ferous metal ores'], axis=1)

In [128]:
# column that sums all the unused domestic extraction
data_full['gov_Unused_domestic_extraction'] = data_full['Unused Domestic Extraction - Fodder crops - Turnips for Fodder'] + data_full['Unused Domestic Extraction - Fodder crops - Vegetables and Roots, Fodder'] + data_full['Unused Domestic Extraction - Forestry - Coniferous wood - Industrial roundwood'] + data_full['Unused Domestic Extraction - Forestry - Coniferous wood - Wood fuel'] + data_full['Unused Domestic Extraction - Forestry - Kapok Fruit'] + data_full['Unused Domestic Extraction - Forestry - Natural Gums'] + data_full['Unused Domestic Extraction - Forestry - Non-coniferous wood - Industrial roundwood'] + data_full['Unused Domestic Extraction - Forestry - Non-coniferous wood - Wood fuel'] + data_full['Unused Domestic Extraction - Forestry - Raw materials other than wood'] + data_full['Unused Domestic Extraction - Fossil Fuels - Anthracite'] + data_full['Unused Domestic Extraction - Fossil Fuels - Coking coal'] + data_full['Unused Domestic Extraction - Fossil Fuels - Crude oil'] + data_full['Unused Domestic Extraction - Fossil Fuels - Lignite/brown coal'] + data_full['Unused Domestic Extraction - Fossil Fuels - Natural gas'] + data_full['Unused Domestic Extraction - Fossil Fuels - Natural gas liquids'] + data_full['Unused Domestic Extraction - Fossil Fuels - Other bituminous coal'] + data_full['Unused Domestic Extraction - Fossil Fuels - Peat'] + data_full['Unused Domestic Extraction - Fossil Fuels - Sub-bituminous coal'] + data_full['Unused Domestic Extraction - Grazing'] + data_full['Unused Domestic Extraction - Metal Ores - Bauxite and aluminium ores'] + data_full['Unused Domestic Extraction - Metal Ores - Copper ores'] + data_full['Unused Domestic Extraction - Metal Ores - Gold ores'] + data_full['Unused Domestic Extraction - Metal Ores - Iron ores'] + data_full['Unused Domestic Extraction - Metal Ores - Lead ores'] + data_full['Unused Domestic Extraction - Metal Ores - Nickel ores'] + data_full['Unused Domestic Extraction - Metal Ores - Other non-ferrous metal ores'] + data_full['Unused Domestic Extraction - Metal Ores - PGM ores'] + data_full['Unused Domestic Extraction - Metal Ores - Silver ores'] + data_full['Unused Domestic Extraction - Metal Ores - Tin ores'] + data_full['Unused Domestic Extraction - Metal Ores - Uranium and thorium ores'] + data_full['Unused Domestic Extraction - Metal Ores - Zinc ores'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Building stones'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Chemical and fertilizer minerals'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Clays and kaolin'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Gravel and sand'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Limestone, gypsum, chalk, dolomite'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Other minerals'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Salt'] + data_full['Unused Domestic Extraction - Non-Metallic Minerals - Slate'] + data_full['Unused Domestic Extraction - Primary Crops - Kapokseed in Shell'] + data_full['Unused Domestic Extraction - Crop and Crop Residue'] + data_full['Unused Domestic Extraction - Grazing and Fodder'] + data_full['Unused Domestic Extraction - Forestry and Timber'] + data_full['Unused Domestic Extraction â\x80\x93 Fisheries'] + data_full['Unused Domestic Extraction - Coal and Peat'] + data_full['Unused Domestic Extraction - Oil and Gas'] + data_full['Unused Domestic Extraction - Non-metalic Minerals'] + data_full['Unused Domestic Extraction - Iron Ore'] + data_full['Unused Domestic Extraction - Non-ferous metal ores'] + data_full['Land use Crop, Forest, Pasture'] 

# drop the columns
data_full = data_full.drop([
    'Unused Domestic Extraction - Primary Crops - Rice',
    'Unused Domestic Extraction - Primary Crops - Wheat',
    'Unused Domestic Extraction - Primary Crops - Barley',
    'Unused Domestic Extraction - Primary Crops - Buckwheat',
    'Unused Domestic Extraction - Primary Crops - Canary Seed',
    'Unused Domestic Extraction - Primary Crops - Maize',
    'Unused Domestic Extraction - Primary Crops - Millet',
    'Unused Domestic Extraction - Primary Crops - Mixed Grain',
    'Unused Domestic Extraction - Primary Crops - Oats',
    'Unused Domestic Extraction - Primary Crops - Rye',
    'Unused Domestic Extraction - Primary Crops - Sorghum',
    'Unused Domestic Extraction - Primary Crops - Triticale',
    'Unused Domestic Extraction - Primary Crops - Cereals nec',
    'Unused Domestic Extraction - Primary Crops - Fonio',
    'Unused Domestic Extraction - Primary Crops - Quinoa',
    'Unused Domestic Extraction - Primary Crops - Potatoes',
    'Unused Domestic Extraction - Primary Crops - Sweet Potatoes',
    'Unused Domestic Extraction - Primary Crops - Yams',
    'Unused Domestic Extraction - Primary Crops - Lentils',
    'Unused Domestic Extraction - Primary Crops - Lupins',
    'Unused Domestic Extraction - Primary Crops - Vetches',
    'Unused Domestic Extraction - Primary Crops - Pulses nec',
    'Unused Domestic Extraction - Primary Crops - Olives',
    'Unused Domestic Extraction - Primary Crops - Artichokes',
    'Unused Domestic Extraction - Primary Crops - Asparagus',
    'Unused Domestic Extraction - Primary Crops - Cabbages',
    'Unused Domestic Extraction - Primary Crops - Carrots',
    'Unused Domestic Extraction - Primary Crops - Cauliflower',
    'Unused Domestic Extraction - Primary Crops - Chillies and peppers, green',
    'Unused Domestic Extraction - Primary Crops - Cucumbers and Gherkins',
    'Unused Domestic Extraction - Primary Crops - Eggplants',
    'Unused Domestic Extraction - Primary Crops - Garlic',
    'Unused Domestic Extraction - Primary Crops - Leeks and other Alliac. Veg.',
    'Unused Domestic Extraction - Primary Crops - Lettuce',
    'Unused Domestic Extraction - Primary Crops - Mushrooms',
    'Unused Domestic Extraction - Primary Crops - Peas, Green',
    'Unused Domestic Extraction - Primary Crops - Pumpkins, Squash, Gourds',
    'Unused Domestic Extraction - Primary Crops - Spinach',
    'Unused Domestic Extraction - Primary Crops - Tomatoes',
    'Unused Domestic Extraction - Primary Crops - Vegetables Fresh nec',
    'Unused Domestic Extraction - Primary Crops - Apples',
    'Unused Domestic Extraction - Primary Crops - Apricots',
    'Unused Domestic Extraction - Primary Crops - Avocados',
    'Unused Domestic Extraction - Primary Crops - Blueberries',
    'Unused Domestic Extraction - Primary Crops - Carobs',
    'Unused Domestic Extraction - Primary Crops - Cherries',
    'Unused Domestic Extraction - Primary Crops - Currants',
    'Unused Domestic Extraction - Primary Crops - Dates',
    'Unused Domestic Extraction - Primary Crops - Figs',
    'Unused Domestic Extraction - Primary Crops - Gooseberries',
    'Unused Domestic Extraction - Primary Crops - Grapefruit and Pomelos',
    'Unused Domestic Extraction - Primary Crops - Grapes',
    'Unused Domestic Extraction - Primary Crops - Kiwi Fruit',
    'Unused Domestic Extraction - Primary Crops - Lemons and Limes',
    'Unused Domestic Extraction - Primary Crops - Oranges',
    'Unused Domestic Extraction - Primary Crops - Peaches and Nectarines',
    'Unused Domestic Extraction - Primary Crops - Pears',
    'Unused Domestic Extraction - Primary Crops - Persimmons',
    'Unused Domestic Extraction - Primary Crops - Pineapples',
    'Unused Domestic Extraction - Primary Crops - Plums',
    'Unused Domestic Extraction - Primary Crops - Quinces',
    'Unused Domestic Extraction - Primary Crops - Raspberries',
    'Unused Domestic Extraction - Primary Crops - Sour Cherries',
    'Unused Domestic Extraction - Primary Crops - Strawberries',
    'Unused Domestic Extraction - Primary Crops - Tang. Mand Clement. Satsma',
    'Unused Domestic Extraction - Primary Crops - Berries nec',
    'Unused Domestic Extraction - Primary Crops - Citrus Fruit nec',
    'Unused Domestic Extraction - Primary Crops - Stone Fruit nec,',
    'Unused Domestic Extraction - Primary Crops - Almonds',
    'Unused Domestic Extraction - Primary Crops - Chestnuts',
    'Unused Domestic Extraction - Primary Crops - Hazelnuts',
    'Unused Domestic Extraction - Primary Crops - Pistachios',
    'Unused Domestic Extraction - Primary Crops - Walnuts',
    'Unused Domestic Extraction - Primary Crops - Cassava',
    'Unused Domestic Extraction - Primary Crops - Roots and Tubers, nes',
    'Unused Domestic Extraction - Primary Crops - Taro',
    'Unused Domestic Extraction - Primary Crops - Yautia',
    'Unused Domestic Extraction - Primary Crops - Bambara beans',
    'Unused Domestic Extraction - Primary Crops - Beans, dry',
    'Unused Domestic Extraction - Primary Crops - Beans, green',
    'Unused Domestic Extraction - Primary Crops - Broad beans, horse beans, dry',
    'Unused Domestic Extraction - Primary Crops - Chick peas',
    'Unused Domestic Extraction - Primary Crops - Cow peas, dry',
    'Unused Domestic Extraction - Primary Crops - Peas, dry',
    'Unused Domestic Extraction - Primary Crops - Pigeon peas',
    'Unused Domestic Extraction - Primary Crops - String beans',
    'Unused Domestic Extraction - Primary Crops - Coconuts',
    'Unused Domestic Extraction - Primary Crops - Okra',
    'Unused Domestic Extraction - Primary Crops - Onions',
    'Unused Domestic Extraction - Primary Crops - Onions, dry',
    'Unused Domestic Extraction - Primary Crops - Other melons',
    'Unused Domestic Extraction - Primary Crops - Watermelons',
    'Unused Domestic Extraction - Primary Crops - Bananas',
    'Unused Domestic Extraction - Primary Crops - Cashewapple',
    'Unused Domestic Extraction - Primary Crops - Cranberries',
    'Unused Domestic Extraction - Primary Crops - Fruit Fresh Nes',
    'Unused Domestic Extraction - Primary Crops - Fruit, tropical fresh nes',
    'Unused Domestic Extraction - Primary Crops - Mangoes, mangosteens, guavas',
    'Unused Domestic Extraction - Primary Crops - Papayas',
    'Unused Domestic Extraction - Primary Crops - Plantains',
    'Unused Domestic Extraction - Primary Crops - Arecanuts',
    'Unused Domestic Extraction - Primary Crops - Brazil nuts, with shell',
    'Unused Domestic Extraction - Primary Crops - Cashew nuts, with shell',
    'Unused Domestic Extraction - Primary Crops - Kolanuts',
    'Unused Domestic Extraction - Primary Crops - Nuts, nes',
    'Unused Domestic Extraction - Primary Crops - Leguminous vegetables, nes',
    'Unused Domestic Extraction - Primary Crops - Maize, green',
    'Unused Domestic Extraction - Primary Crops - Pome fruit, nes',
    'Unused Domestic Extraction - Primary Crops - Cassava leaves',
    'Unused Domestic Extraction - Primary Crops - Groundnuts in Shell',
    'Unused Domestic Extraction - Primary Crops - Hempseed',
    'Unused Domestic Extraction - Primary Crops - Linseed',
    'Unused Domestic Extraction - Primary Crops - Melonseed',
    'Unused Domestic Extraction - Primary Crops - Mustard Seed',
    'Unused Domestic Extraction - Primary Crops - Poppy Seed',
    'Unused Domestic Extraction - Primary Crops - Rapeseed',
    'Unused Domestic Extraction - Primary Crops - Safflower Seed',
    'Unused Domestic Extraction - Primary Crops - Sesame Seed',
    'Unused Domestic Extraction - Primary Crops - Soybeans',
    'Unused Domestic Extraction - Primary Crops - Sunflower Seed',
    'Unused Domestic Extraction - Primary Crops - Oilseeds nec',
    'Unused Domestic Extraction - Primary Crops - Oil Palm Fruit',
    'Unused Domestic Extraction - Primary Crops - Castor oil seed',
    'Unused Domestic Extraction - Primary Crops - Karite Nuts',
    'Unused Domestic Extraction - Primary Crops - Tung Nuts',
    'Unused Domestic Extraction - Primary Crops - Jojoba Seeds',
    'Unused Domestic Extraction - Primary Crops - Tallowtree Seeds',
    'Unused Domestic Extraction - Primary Crops - Cottonseed',
    'Unused Domestic Extraction - Primary Crops - Sugar Beets',
    'Unused Domestic Extraction - Primary Crops - Sugar Cane',
    'Unused Domestic Extraction - Primary Crops - Sugar Crops nes',
    'Unused Domestic Extraction - Primary Crops - Cotton Lint',
    'Unused Domestic Extraction - Primary Crops - Flax Fibre and Tow',
    'Unused Domestic Extraction - Primary Crops - Hemp Fibre and Tow',
    'Unused Domestic Extraction - Primary Crops - Abaca',
    'Unused Domestic Extraction - Primary Crops - Agave Fibres nes',
    'Unused Domestic Extraction - Primary Crops - Coir',
    'Unused Domestic Extraction - Primary Crops - Fibre Crops nes',
    'Unused Domestic Extraction - Primary Crops - Ramie',
    'Unused Domestic Extraction - Primary Crops - Sisal',
    'Unused Domestic Extraction - Primary Crops - Kapok Fibre',
    'Unused Domestic Extraction - Primary Crops - Jute and Jute-like Fibres',
    'Unused Domestic Extraction - Primary Crops - Other Bastfibres',
    'Unused Domestic Extraction - Primary Crops - Anise, Badian, Fennel',
    'Unused Domestic Extraction - Primary Crops - Chicory Roots',
    'Unused Domestic Extraction - Primary Crops - Coffee, Green',
    'Unused Domestic Extraction - Primary Crops - Hops',
    'Unused Domestic Extraction - Primary Crops - Peppermint',
    'Unused Domestic Extraction - Primary Crops - Pyrethrum, Dried Flowers',
    'Unused Domestic Extraction - Primary Crops - Tea',
    'Unused Domestic Extraction - Primary Crops - Spices nec',
    'Unused Domestic Extraction - Primary Crops - Cocoa Beans',
    'Unused Domestic Extraction - Primary Crops - Mate',
    'Unused Domestic Extraction - Primary Crops - Tobacco Leaves',
    'Unused Domestic Extraction - Primary Crops - Natural Rubber',
    'Unused Domestic Extraction - Primary Crops - Cinnamon',
    'Unused Domestic Extraction - Primary Crops - Cloves',
    'Unused Domestic Extraction - Primary Crops - Ginger',
    'Unused Domestic Extraction - Primary Crops - Nutmeg, mace and cardamoms',
    'Unused Domestic Extraction - Primary Crops - Vanilla',
    'Unused Domestic Extraction - Primary Crops - Pepper',
    'Unused Domestic Extraction - Primary Crops - Chillies and peppers, dry',
    'Unused Domestic Extraction - Primary Crops - Tea nes',
    'Unused Domestic Extraction - Crop residues - Feed',
    'Unused Domestic Extraction - Crop residues - Straw',
    'Unused Domestic Extraction - Fishery - Aquatic plants',
    'Unused Domestic Extraction - Fishery - Inland waters fish catch',
    'Unused Domestic Extraction - Fishery - Marine fish catch',
    'Unused Domestic Extraction - Fishery - Other (e.g. Aquatic mammals)',
    'Unused Domestic Extraction - Fodder crops - Alfalfa for Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Beets for Fodder',
    'Unused Domestic Extraction - Fodder crops - Cabbage for Fodder',
    'Unused Domestic Extraction - Fodder crops - Carrots for Fodder',
    'Unused Domestic Extraction - Fodder crops - Clover for Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Forage Products nec',
    'Unused Domestic Extraction - Fodder crops - Grasses nec for Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Green Oilseeds for Fodder',
    'Unused Domestic Extraction - Fodder crops - Leguminous nec for forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Maize for Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Other grasses',
    'Unused Domestic Extraction - Fodder crops - Rye Grass, Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Sorghum for Forage and Silage',
    'Unused Domestic Extraction - Fodder crops - Swedes for Fodder',
    'Unused Domestic Extraction - Fodder crops - Turnips for Fodder',
    'Unused Domestic Extraction - Fodder crops - Vegetables and Roots, Fodder',
    'Unused Domestic Extraction - Forestry - Coniferous wood - Industrial roundwood',
    'Unused Domestic Extraction - Forestry - Coniferous wood - Wood fuel',
    'Unused Domestic Extraction - Forestry - Kapok Fruit',
    'Unused Domestic Extraction - Forestry - Natural Gums',
    'Unused Domestic Extraction - Forestry - Non-coniferous wood - Industrial roundwood',
    'Unused Domestic Extraction - Forestry - Non-coniferous wood - Wood fuel',
    'Unused Domestic Extraction - Forestry - Raw materials other than wood',
    'Unused Domestic Extraction - Fossil Fuels - Anthracite',
    'Unused Domestic Extraction - Fossil Fuels - Coking coal',
    'Unused Domestic Extraction - Fossil Fuels - Crude oil',
    'Unused Domestic Extraction - Fossil Fuels - Lignite/brown coal',
    'Unused Domestic Extraction - Fossil Fuels - Natural gas',
    'Unused Domestic Extraction - Fossil Fuels - Natural gas liquids',
    'Unused Domestic Extraction - Fossil Fuels - Other bituminous coal',
    'Unused Domestic Extraction - Fossil Fuels - Peat',
    'Unused Domestic Extraction - Fossil Fuels - Sub-bituminous coal',
    'Unused Domestic Extraction - Grazing',
    'Unused Domestic Extraction - Metal Ores - Bauxite and aluminium ores',
    'Unused Domestic Extraction - Metal Ores - Copper ores',
    'Unused Domestic Extraction - Metal Ores - Gold ores',
    'Unused Domestic Extraction - Metal Ores - Iron ores',
    'Unused Domestic Extraction - Metal Ores - Lead ores',
    'Unused Domestic Extraction - Metal Ores - Nickel ores',
    'Unused Domestic Extraction - Metal Ores - Other non-ferrous metal ores',
    'Unused Domestic Extraction - Metal Ores - PGM ores',
    'Unused Domestic Extraction - Metal Ores - Silver ores',
    'Unused Domestic Extraction - Metal Ores - Tin ores',
    'Unused Domestic Extraction - Metal Ores - Uranium and thorium ores',
    'Unused Domestic Extraction - Metal Ores - Zinc ores',
    'Unused Domestic Extraction - Non-Metallic Minerals - Building stones',
    'Unused Domestic Extraction - Non-Metallic Minerals - Chemical and fertilizer minerals',
    'Unused Domestic Extraction - Non-Metallic Minerals - Clays and kaolin',
    'Unused Domestic Extraction - Non-Metallic Minerals - Gravel and sand',
    'Unused Domestic Extraction - Non-Metallic Minerals - Limestone, gypsum, chalk, dolomite',
    'Unused Domestic Extraction - Non-Metallic Minerals - Other minerals',
    'Unused Domestic Extraction - Non-Metallic Minerals - Salt',
    'Unused Domestic Extraction - Non-Metallic Minerals - Slate',
    'Unused Domestic Extraction - Primary Crops - Kapokseed in Shell',
    'Unused Domestic Extraction',
    'Unused Domestic Extraction - Crop and Crop Residue',
    'Unused Domestic Extraction - Grazing and Fodder',
    'Unused Domestic Extraction - Forestry and Timber',
    'Unused Domestic Extraction â\x80\x93 Fisheries',
    'Unused Domestic Extraction - Coal and Peat',
    'Unused Domestic Extraction - Oil and Gas',
    'Unused Domestic Extraction - Non-metalic Minerals',
    'Unused Domestic Extraction - Iron Ore',
    'Unused Domestic Extraction - Non-ferous metal ores',
    'Land use Crop, Forest, Pasture'], axis=1)

In [129]:
# column that sums all water withdrawal related variables
data_full['gov_Water_withdrawal_blue_total'] = data_full['Water Withdrawal Blue - Manufacturing - Products of meat cattle'] + data_full['Water Withdrawal Blue - Manufacturing - Products of meat pigs'] + data_full['Water Withdrawal Blue - Manufacturing - Products of meat poultry'] + data_full['Water Withdrawal Blue - Manufacturing - Meat products nec'] + data_full['Water Withdrawal Blue - Manufacturing - products of Vegetable oils and fats'] + data_full['Water Withdrawal Blue - Manufacturing - Dairy products'] + data_full['Water Withdrawal Blue - Manufacturing - Processed rice'] + data_full['Water Withdrawal Blue - Manufacturing - Sugar'] + data_full['Water Withdrawal Blue - Manufacturing - Food products nec'] + data_full['Water Withdrawal Blue - Manufacturing - Beverages'] + data_full['Water Withdrawal Blue - Manufacturing - Fish products'] + data_full['Water Withdrawal Blue - Manufacturing - Tobacco products (16)'] + data_full['Water Withdrawal Blue - Manufacturing - Textiles (17)'] + data_full['Water Withdrawal Blue - Manufacturing - Wearing apparel; furs (18)'] + data_full['Water Withdrawal Blue - Manufacturing - Leather and leather products (19)'] + data_full['Water Withdrawal Blue - Manufacturing - Pulp'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary paper for treatment, Re-processing of secondary paper into new pulp'] + data_full['Water Withdrawal Blue - Manufacturing - Paper and paper products'] + data_full['Water Withdrawal Blue - Manufacturing - Printed matter and recorded media (22)'] + data_full['Water Withdrawal Blue - Manufacturing - Plastics, basic'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary plastic for treatment, Re-processing of secondary plastic into new plastic'] + data_full['Water Withdrawal Blue - Manufacturing - N-fertiliser'] + data_full['Water Withdrawal Blue - Manufacturing - P- and other fertiliser'] + data_full['Water Withdrawal Blue - Manufacturing - Chemicals nec'] + data_full['Water Withdrawal Blue - Manufacturing - Rubber and plastic products (25)'] + data_full['Water Withdrawal Blue - Manufacturing - Glass and glass products'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary glass for treatment, Re-processing of secondary glass into new glass'] + data_full['Water Withdrawal Blue - Manufacturing - Ceramic goods'] + data_full['Water Withdrawal Blue - Manufacturing - Bricks, tiles and construction products, in baked clay'] + data_full['Water Withdrawal Blue - Manufacturing - Cement, lime and plaster'] + data_full['Water Withdrawal Blue - Manufacturing - Ash for treatment, Re-processing of ash into clinker'] + data_full['Water Withdrawal Blue - Manufacturing - Other non-metallic mineral products'] + data_full['Water Withdrawal Blue - Manufacturing - Basic iron and steel and of ferro-alloys and first products thereof'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary steel for treatment, Re-processing of secondary steel into new steel'] + data_full['Water Withdrawal Blue - Manufacturing - Precious metals'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary preciuos metals for treatment, Re-processing of secondary preciuos metals into new preciuos metals'] + data_full['Water Withdrawal Blue - Manufacturing - Aluminium and aluminium products'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary aluminium for treatment, Re-processing of secondary aluminium into new aluminium'] + data_full['Water Withdrawal Blue - Manufacturing - Lead, zinc and tin and products thereof'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary lead for treatment, Re-processing of secondary lead into new lead'] + data_full['Water Withdrawal Blue - Manufacturing - Copper products'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary copper for treatment, Re-processing of secondary copper into new copper'] + data_full['Water Withdrawal Blue - Manufacturing - Other non-ferrous metal products'] + data_full['Water Withdrawal Blue - Manufacturing - Secondary other non-ferrous metals for treatment, Re-processing of secondary other non-ferrous metals into new other non-ferrous metals'] + data_full['Water Withdrawal Blue - Manufacturing - Fabricated metal products, except machinery and equipment (28)'] + data_full['Water Withdrawal Blue - Manufacturing - Machinery and equipment n.e.c. (29)'] + data_full['Water Withdrawal Blue - Manufacturing - Office machinery and computers (30)'] + data_full['Water Withdrawal Blue - Manufacturing - Electrical machinery and apparatus n.e.c. (31)'] + data_full['Water Withdrawal Blue - Manufacturing - Radio, television and communication equipment and apparatus (32)'] + data_full['Water Withdrawal Blue - Manufacturing - Medical, precision and optical instruments, watches and clocks (33)'] + data_full['Water Withdrawal Blue - Manufacturing - Motor vehicles, trailers and semi-trailers (34)'] + data_full['Water Withdrawal Blue - Manufacturing - Other transport equipment (35)'] + data_full['Water Withdrawal Blue - Manufacturing - Furniture; other manufactured goods n.e.c. (36)'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by coal'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by gas'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by nuclear'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by hydro'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by wind'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by petroleum and other oil derivatives'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by biomass and waste'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by solar photovoltaic'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by solar thermal'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by tide, wave, ocean'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity by Geothermal'] + data_full['Water Withdrawal Blue - Electricity - tower - Electricity nec'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by coal'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by gas'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by nuclear'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by hydro'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by wind'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by petroleum and other oil derivatives'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by biomass and waste'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by solar photovoltaic'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by solar thermal'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by tide, wave, ocean'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity by Geothermal'] + data_full['Water Withdrawal Blue - Electricity - once-through - Electricity nec'] + data_full['Water Withdrawal Blue - Domestic - domestic Water Withdrawal Blue']

# drop the columns
data_full = data_full.drop([ 
    'Water Withdrawal Blue - Manufacturing - Products of meat cattle',
    'Water Withdrawal Blue - Manufacturing - Products of meat pigs',
    'Water Withdrawal Blue - Manufacturing - Products of meat poultry',
    'Water Withdrawal Blue - Manufacturing - Meat products nec',
    'Water Withdrawal Blue - Manufacturing - products of Vegetable oils and fats',
    'Water Withdrawal Blue - Manufacturing - Dairy products',
    'Water Withdrawal Blue - Manufacturing - Processed rice',
    'Water Withdrawal Blue - Manufacturing - Sugar',
    'Water Withdrawal Blue - Manufacturing - Food products nec',
    'Water Withdrawal Blue - Manufacturing - Beverages',
    'Water Withdrawal Blue - Manufacturing - Fish products',
    'Water Withdrawal Blue - Manufacturing - Tobacco products (16)',
    'Water Withdrawal Blue - Manufacturing - Textiles (17)',
    'Water Withdrawal Blue - Manufacturing - Wearing apparel; furs (18)',
    'Water Withdrawal Blue - Manufacturing - Leather and leather products (19)',
    'Water Withdrawal Blue - Manufacturing - Pulp',
    'Water Withdrawal Blue - Manufacturing - Secondary paper for treatment, Re-processing of secondary paper into new pulp',
    'Water Withdrawal Blue - Manufacturing - Paper and paper products',
    'Water Withdrawal Blue - Manufacturing - Printed matter and recorded media (22)',
    'Water Withdrawal Blue - Manufacturing - Plastics, basic',
    'Water Withdrawal Blue - Manufacturing - Secondary plastic for treatment, Re-processing of secondary plastic into new plastic',
    'Water Withdrawal Blue - Manufacturing - N-fertiliser',
    'Water Withdrawal Blue - Manufacturing - P- and other fertiliser',
    'Water Withdrawal Blue - Manufacturing - Chemicals nec',
    'Water Withdrawal Blue - Manufacturing - Rubber and plastic products (25)',
    'Water Withdrawal Blue - Manufacturing - Glass and glass products',
    'Water Withdrawal Blue - Manufacturing - Secondary glass for treatment, Re-processing of secondary glass into new glass',
    'Water Withdrawal Blue - Manufacturing - Ceramic goods',
    'Water Withdrawal Blue - Manufacturing - Bricks, tiles and construction products, in baked clay',
    'Water Withdrawal Blue - Manufacturing - Cement, lime and plaster',
    'Water Withdrawal Blue - Manufacturing - Ash for treatment, Re-processing of ash into clinker',
    'Water Withdrawal Blue - Manufacturing - Other non-metallic mineral products',
    'Water Withdrawal Blue - Manufacturing - Basic iron and steel and of ferro-alloys and first products thereof',
    'Water Withdrawal Blue - Manufacturing - Secondary steel for treatment, Re-processing of secondary steel into new steel',
    'Water Withdrawal Blue - Manufacturing - Precious metals',
    'Water Withdrawal Blue - Manufacturing - Secondary preciuos metals for treatment, Re-processing of secondary preciuos metals into new preciuos metals',
    'Water Withdrawal Blue - Manufacturing - Aluminium and aluminium products',
    'Water Withdrawal Blue - Manufacturing - Secondary aluminium for treatment, Re-processing of secondary aluminium into new aluminium',
    'Water Withdrawal Blue - Manufacturing - Lead, zinc and tin and products thereof',
    'Water Withdrawal Blue - Manufacturing - Secondary lead for treatment, Re-processing of secondary lead into new lead',
    'Water Withdrawal Blue - Manufacturing - Copper products',
    'Water Withdrawal Blue - Manufacturing - Secondary copper for treatment, Re-processing of secondary copper into new copper',
    'Water Withdrawal Blue - Manufacturing - Other non-ferrous metal products',
    'Water Withdrawal Blue - Manufacturing - Secondary other non-ferrous metals for treatment, Re-processing of secondary other non-ferrous metals into new other non-ferrous metals',
    'Water Withdrawal Blue - Manufacturing - Fabricated metal products, except machinery and equipment (28)',
    'Water Withdrawal Blue - Manufacturing - Machinery and equipment n.e.c. (29)',
    'Water Withdrawal Blue - Manufacturing - Office machinery and computers (30)',
    'Water Withdrawal Blue - Manufacturing - Electrical machinery and apparatus n.e.c. (31)',
    'Water Withdrawal Blue - Manufacturing - Radio, television and communication equipment and apparatus (32)',
    'Water Withdrawal Blue - Manufacturing - Medical, precision and optical instruments, watches and clocks (33)',
    'Water Withdrawal Blue - Manufacturing - Motor vehicles, trailers and semi-trailers (34)',
    'Water Withdrawal Blue - Manufacturing - Other transport equipment (35)',
    'Water Withdrawal Blue - Manufacturing - Furniture; other manufactured goods n.e.c. (36)',
    'Water Withdrawal Blue - Electricity - tower - Electricity by coal',
    'Water Withdrawal Blue - Electricity - tower - Electricity by gas',
    'Water Withdrawal Blue - Electricity - tower - Electricity by nuclear',
    'Water Withdrawal Blue - Electricity - tower - Electricity by hydro',
    'Water Withdrawal Blue - Electricity - tower - Electricity by wind',
    'Water Withdrawal Blue - Electricity - tower - Electricity by petroleum and other oil derivatives',
    'Water Withdrawal Blue - Electricity - tower - Electricity by biomass and waste',
    'Water Withdrawal Blue - Electricity - tower - Electricity by solar photovoltaic',
    'Water Withdrawal Blue - Electricity - tower - Electricity by solar thermal',
    'Water Withdrawal Blue - Electricity - tower - Electricity by tide, wave, ocean',
    'Water Withdrawal Blue - Electricity - tower - Electricity by Geothermal',
    'Water Withdrawal Blue - Electricity - tower - Electricity nec',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by coal',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by gas',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by nuclear',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by hydro',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by wind',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by petroleum and other oil derivatives',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by biomass and waste',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by solar photovoltaic',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by solar thermal',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by tide, wave, ocean',
    'Water Withdrawal Blue - Electricity - once-through - Electricity by Geothermal',
    'Water Withdrawal Blue - Electricity - once-through - Electricity nec',
    'Water Withdrawal Blue - Domestic - domestic Water Withdrawal Blue'], 
    'gov_Water_withdrawal_blue_total', 'Water Consumption Green - Agriculture - rice',
    'Water Consumption Green - Agriculture - wheat',
    'Water Consumption Green - Agriculture - other cereals',
    'Water Consumption Green - Agriculture - roots and tubers',
    'Water Consumption Green - Agriculture - sugar crops',
    'Water Consumption Green - Agriculture - pulses',
    'Water Consumption Green - Agriculture - nuts',
    'Water Consumption Green - Agriculture - oil crops',
    'Water Consumption Green - Agriculture - vegetables',
    'Water Consumption Green - Agriculture - fruits',
    'Water Consumption Green - Agriculture - fibres',
    'Water Consumption Green - Agriculture - other crops',
    'Water Consumption Green - Agriculture - fodder crops',
    'Water Consumption Blue - Agriculture - rice',
    'Water Consumption Blue - Agriculture - wheat',
    'Water Consumption Blue - Agriculture - other cereals',
    'Water Consumption Blue - Agriculture - roots and tubers',
    'Water Consumption Blue - Agriculture - sugar crops',
    'Water Consumption Blue - Agriculture - pulses',
    'Water Consumption Blue - Agriculture - nuts',
    'Water Consumption Blue - Agriculture - oil crops',
    'Water Consumption Blue - Agriculture - vegetables',
    'Water Consumption Blue - Agriculture - fruits',
    'Water Consumption Blue - Agriculture - fibres',
    'Water Consumption Blue - Agriculture - other crops',
    'Water Consumption Blue - Agriculture - fodder crops',
    'Water Consumption Blue - Livestock - dairy cattle',
    'Water Consumption Blue - Livestock - nondairy cattle',
    'Water Consumption Blue - Livestock - pigs',
    'Water Consumption Blue - Livestock - sheep',
    'Water Consumption Blue - Livestock - goats',
    'Water Consumption Blue - Livestock - buffaloes',
    'Water Consumption Blue - Livestock - camels',
    'Water Consumption Blue - Livestock - horses',
    'Water Consumption Blue - Livestock - chicken',
    'Water Consumption Blue - Livestock - turkeys',
    'Water Consumption Blue - Livestock - ducks',
    'Water Consumption Blue - Livestock - geese',
    'Water Consumption Blue - Manufacturing - Products of meat cattle',
    'Water Consumption Blue - Manufacturing - Products of meat pigs',
    'Water Consumption Blue - Manufacturing - Products of meat poultry',
    'Water Consumption Blue - Manufacturing - Meat products nec',
    'Water Consumption Blue - Manufacturing - products of Vegetable oils and fats',
    'Water Consumption Blue - Manufacturing - Dairy products',
    'Water Consumption Blue - Manufacturing - Processed rice',
    'Water Consumption Blue - Manufacturing - Sugar',
    'Water Consumption Blue - Manufacturing - Food products nec',
    'Water Consumption Blue - Manufacturing - Beverages',
    'Water Consumption Blue - Manufacturing - Fish products',
    'Water Consumption Blue - Manufacturing - Tobacco products (16)',
    'Water Consumption Blue - Manufacturing - Textiles (17)',
    'Water Consumption Blue - Manufacturing - Wearing apparel; furs (18)',
    'Water Consumption Blue - Manufacturing - Leather and leather products (19)',
    'Water Consumption Blue - Manufacturing - Pulp',
    'Water Consumption Blue - Manufacturing - Secondary paper for treatment, Re-processing of secondary paper into new pulp',
    'Water Consumption Blue - Manufacturing - Paper and paper products',
    'Water Consumption Blue - Manufacturing - Printed matter and recorded media (22)',
    'Water Consumption Blue - Manufacturing - Plastics, basic',
    'Water Consumption Blue - Manufacturing - Secondary plastic for treatment, Re-processing of secondary plastic into new plastic',
    'Water Consumption Blue - Manufacturing - N-fertiliser',
    'Water Consumption Blue - Manufacturing - P- and other fertiliser',
    'Water Consumption Blue - Manufacturing - Chemicals nec',
    'Water Consumption Blue - Manufacturing - Rubber and plastic products (25)',
    'Water Consumption Blue - Manufacturing - Glass and glass products',
    'Water Consumption Blue - Manufacturing - Secondary glass for treatment, Re-processing of secondary glass into new glass',
    'Water Consumption Blue - Manufacturing - Ceramic goods',
    'Water Consumption Blue - Manufacturing - Bricks, tiles and construction products, in baked clay',
    'Water Consumption Blue - Manufacturing - Cement, lime and plaster',
    'Water Consumption Blue - Manufacturing - Ash for treatment, Re-processing of ash into clinker',
    'Water Consumption Blue - Manufacturing - Other non-metallic mineral products',
    'Water Consumption Blue - Manufacturing - Basic iron and steel and of ferro-alloys and first products thereof',
    'Water Consumption Blue - Manufacturing - Secondary steel for treatment, Re-processing of secondary steel into new steel',
    'Water Consumption Blue - Manufacturing - Precious metals',
    'Water Consumption Blue - Manufacturing - Secondary preciuos metals for treatment, Re-processing of secondary preciuos metals into new preciuos metals',
    'Water Consumption Blue - Manufacturing - Aluminium and aluminium products',
    'Water Consumption Blue - Manufacturing - Secondary aluminium for treatment, Re-processing of secondary aluminium into new aluminium',
    'Water Consumption Blue - Manufacturing - Lead, zinc and tin and products thereof',
    'Water Consumption Blue - Manufacturing - Secondary lead for treatment, Re-processing of secondary lead into new lead',
    'Water Consumption Blue - Manufacturing - Copper products',
    'Water Consumption Blue - Manufacturing - Secondary copper for treatment, Re-processing of secondary copper into new copper',
    'Water Consumption Blue - Manufacturing - Other non-ferrous metal products',
    'Water Consumption Blue - Manufacturing - Secondary other non-ferrous metals for treatment, Re-processing of secondary other non-ferrous metals into new other non-ferrous metals',
    'Water Consumption Blue - Manufacturing - Fabricated metal products, except machinery and equipment (28)',
    'Water Consumption Blue - Manufacturing - Machinery and equipment n.e.c. (29)',
    'Water Consumption Blue - Manufacturing - Office machinery and computers (30)',
    'Water Consumption Blue - Manufacturing - Electrical machinery and apparatus n.e.c. (31)',
    'Water Consumption Blue - Manufacturing - Radio, television and communication equipment and apparatus (32)',
    'Water Consumption Blue - Manufacturing - Medical, precision and optical instruments, watches and clocks (33)',
    'Water Consumption Blue - Manufacturing - Motor vehicles, trailers and semi-trailers (34)',
    'Water Consumption Blue - Manufacturing - Other transport equipment (35)',
    'Water Consumption Blue - Manufacturing - Furniture; other manufactured goods n.e.c. (36)',
    'Water Consumption Blue - Electricity - tower - Electricity by coal',
    'Water Consumption Blue - Electricity - tower - Electricity by gas',
    'Water Consumption Blue - Electricity - tower - Electricity by nuclear',
    'Water Consumption Blue - Electricity - tower - Electricity by hydro',
    'Water Consumption Blue - Electricity - tower - Electricity by wind',
    'Water Consumption Blue - Electricity - tower - Electricity by petroleum and other oil derivatives',
    'Water Consumption Blue - Electricity - tower - Electricity by biomass and waste',
    'Water Consumption Blue - Electricity - tower - Electricity by solar photovoltaic',
    'Water Consumption Blue - Electricity - tower - Electricity by solar thermal',
    'Water Consumption Blue - Electricity - tower - Electricity by tide, wave, ocean',
    'Water Consumption Blue - Electricity - tower - Electricity by Geothermal',
    'Water Consumption Blue - Electricity - tower - Electricity nec',
    'Water Consumption Blue - Electricity - once-through - Electricity by coal',
    'Water Consumption Blue - Electricity - once-through - Electricity by gas',
    'Water Consumption Blue - Electricity - once-through - Electricity by nuclear',
    'Water Consumption Blue - Electricity - once-through - Electricity by hydro',
    'Water Consumption Blue - Electricity - once-through - Electricity by wind',
    'Water Consumption Blue - Electricity - once-through - Electricity by petroleum and other oil derivatives',
    'Water Consumption Blue - Electricity - once-through - Electricity by biomass and waste',
    'Water Consumption Blue - Electricity - once-through - Electricity by solar photovoltaic',
    'Water Consumption Blue - Electricity - once-through - Electricity by solar thermal',
    'Water Consumption Blue - Electricity - once-through - Electricity by tide, wave, ocean',
    'Water Consumption Blue - Electricity - once-through - Electricity by Geothermal',
    'Water Consumption Blue - Electricity - once-through - Electricity nec',
    'Water Consumption Blue - Domestic - domestic Water Consumption Blue'], axis=1)

In [130]:
# column that sums gasses non combustion related variables
data_full['gov_Ni_non_combustion_total'] = data_full['Ni - non combustion - Agglomeration plant - pellets - air'] + data_full['Ni - non combustion - Agglomeration plant - sinter - air'] + data_full['Ni - non combustion - Glass production - air'] + data_full['Ni - non combustion - Production of coke oven coke - air'] + data_full['Ni - non combustion - Production of gascoke - air'] + data_full['Ni - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Ni - non combustion - Steel production: electric arc furnace - air'] + data_full['Ni - non combustion - Steel production: open hearth furnace - air']

data_full = data_full.drop([
    'Ni - non combustion - Agglomeration plant - pellets - air',
    'Ni - non combustion - Agglomeration plant - sinter - air',
    'Ni - non combustion - Glass production - air',
    'Ni - non combustion - Production of coke oven coke - air',
    'Ni - non combustion - Production of gascoke - air',
    'Ni - non combustion - Steel production: basic oxygen furnace - air',
    'Ni - non combustion - Steel production: electric arc furnace - air',
    'Ni - non combustion - Steel production: open hearth furnace - air'], axis=1)

In [131]:
# column that sums gasses non combustion related variables
data_full['gov_PAH_non_combustion_total'] = data_full['PAH - non combustion - Agglomeration plant - pellets - air'] + data_full['PAH - non combustion - Agglomeration plant - sinter - air'] + data_full['PAH - non combustion - Pig iron production, blast furnace - air'] + data_full['PAH - non combustion - Production of coke oven coke - air'] + data_full['PAH - non combustion - Production of gascoke - air'] + data_full['PAH - non combustion - Steel production: basic oxygen furnace - air'] + data_full['PAH - non combustion - Steel production: electric arc furnace - air']

data_full = data_full.drop([
    'PAH - non combustion - Agglomeration plant - pellets - air',
    'PAH - non combustion - Agglomeration plant - sinter - air',
    'PAH - non combustion - Pig iron production, blast furnace - air',
    'PAH - non combustion - Production of coke oven coke - air',
    'PAH - non combustion - Production of gascoke - air',
    'PAH - non combustion - Steel production: basic oxygen furnace - air',
    'PAH - non combustion - Steel production: electric arc furnace - air'], axis=1)

In [132]:
# column that sums gasses non combustion related variables
data_full['gov_PCB_non_combustion_total'] = data_full['PCB - non combustion - Agglomeration plant - pellets - air'] + data_full['PCB - non combustion - Agglomeration plant - sinter - air'] + data_full['PCB - non combustion - Pig iron production, blast furnace - air'] + data_full['PCB - non combustion - Steel production: basic oxygen furnace - air'] + data_full['PCB - non combustion - Steel production: electric arc furnace - air']

data_full = data_full.drop([
    'PCB - non combustion - Agglomeration plant - pellets - air',
    'PCB - non combustion - Agglomeration plant - sinter - air',
    'PCB - non combustion - Pig iron production, blast furnace - air',
    'PCB - non combustion - Steel production: basic oxygen furnace - air',
    'PCB - non combustion - Steel production: electric arc furnace - air'], axis=1)

In [133]:
# column that sums gasses non combustion related variables
data_full['gov_PCDD_F_non_combustion_total'] = data_full['PCDD/F - non combustion - Agglomeration plant - pellets - air'] + data_full['PCDD/F - non combustion - Agglomeration plant - sinter - air'] + data_full['PCDD/F - non combustion - Pig iron production, blast furnace - air'] + data_full['PCDD/F - non combustion - Secondary aluminium production - air'] + data_full['PCDD/F - non combustion - Steel production: basic oxygen furnace - air'] + data_full['PCDD/F - non combustion - Steel production: electric arc furnace - air']

data_full = data_full.drop([
    'PCDD/F - non combustion - Agglomeration plant - pellets - air',
    'PCDD/F - non combustion - Agglomeration plant - sinter - air',
    'PCDD/F - non combustion - Pig iron production, blast furnace - air',
    'PCDD/F - non combustion - Secondary aluminium production - air',
    'PCDD/F - non combustion - Steel production: basic oxygen furnace - air',
    'PCDD/F - non combustion - Steel production: electric arc furnace - air'], axis=1)

In [134]:
# column that sums elements non combustion related variables
data_full['gov_Pb_non_combustion_total'] = data_full['Pb - non combustion - Agglomeration plant - pellets - air'] + data_full['Pb - non combustion - Agglomeration plant - sinter - air'] + data_full['Pb - non combustion - Glass production - air'] + data_full['Pb - non combustion - Nickel, unwrought - air'] + data_full['Pb - non combustion - Pig iron production, blast furnace - air'] + data_full['Pb - non combustion - Production of coke oven coke - air'] + data_full['Pb - non combustion - Production of gascoke - air'] + data_full['Pb - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['Pb - non combustion - Refined lead, unwrought - air'] + data_full['Pb - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Pb - non combustion - Steel production: electric arc furnace - air'] + data_full['Pb - non combustion - Steel production: open hearth furnace - air'] + data_full['Pb - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['Pb - non combustion - Zinc, unwrought, not alloyed - air']

data_full = data_full.drop([ 
    'Pb - non combustion - Agglomeration plant - pellets - air',
    'Pb - non combustion - Agglomeration plant - sinter - air',
    'Pb - non combustion - Glass production - air',
    'Pb - non combustion - Nickel, unwrought - air',
    'Pb - non combustion - Pig iron production, blast furnace - air',
    'Pb - non combustion - Production of coke oven coke - air',
    'Pb - non combustion - Production of gascoke - air',
    'Pb - non combustion - Refined copper; unwrought, not alloyed - air',
    'Pb - non combustion - Refined lead, unwrought - air',
    'Pb - non combustion - Steel production: basic oxygen furnace - air',
    'Pb - non combustion - Steel production: electric arc furnace - air',
    'Pb - non combustion - Steel production: open hearth furnace - air',
    'Pb - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'Pb - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [135]:
# column that sums gasses non combustion related variables
data_full['gov_SOx_non_combustion_total'] = data_full['SOx - non combustion - Agglomeration plant - sinter - air'] + data_full['SOx - non combustion - Bricks production - air'] + data_full['SOx - non combustion - Cement production - air'] + data_full['SOx - non combustion - Chemical wood pulp, dissolving grades - air'] + data_full['SOx - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air'] + data_full['SOx - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air'] + data_full['SOx - non combustion - Glass production - air'] + data_full['SOx - non combustion - Lime production - air'] + data_full['SOx - non combustion - Nickel, unwrought - air'] + data_full['SOx - non combustion - Oil refinery - air'] + data_full['SOx - non combustion - Pig iron production, blast furnace - air'] + data_full['SOx - non combustion - Production of coke oven coke - air'] + data_full['SOx - non combustion - Production of gascoke - air'] + data_full['SOx - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['SOx - non combustion - Refined lead, unwrought - air'] + data_full['SOx - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air'] + data_full['SOx - non combustion - Sulphuric acid production - air'] + data_full['SOx - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['SOx - non combustion - Zinc, unwrought, not alloyed - air']

data_full = data_full.drop([ 
    'SOx - non combustion - Agglomeration plant - sinter - air',
    'SOx - non combustion - Bricks production - air',
    'SOx - non combustion - Cement production - air',
    'SOx - non combustion - Chemical wood pulp, dissolving grades - air',
    'SOx - non combustion - Chemical wood pulp, soda and sulphate, other than dissolving grades - air',
    'SOx - non combustion - Chemical wood pulp, sulphite, other than dissolving grades - air',
    'SOx - non combustion - Glass production - air',
    'SOx - non combustion - Lime production - air',
    'SOx - non combustion - Nickel, unwrought - air',
    'SOx - non combustion - Oil refinery - air',
    'SOx - non combustion - Pig iron production, blast furnace - air',
    'SOx - non combustion - Production of coke oven coke - air',
    'SOx - non combustion - Production of gascoke - air',
    'SOx - non combustion - Refined copper; unwrought, not alloyed - air',
    'SOx - non combustion - Refined lead, unwrought - air',
    'SOx - non combustion - Semi-chemical wood pulp, pulp of fibers other than wood - air',
    'SOx - non combustion - Sulphuric acid production - air',
    'SOx - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'SOx - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [136]:
# column that sums gasses non combustion related variables
data_full['gov_Se_non_combustion_total'] = data_full['Se - non combustion - Agglomeration plant - pellets - air'] + data_full['Se - non combustion - Agglomeration plant - sinter - air'] + data_full['Se - non combustion - Glass production - air'] + data_full['Se - non combustion - Steel production: basic oxygen furnace - air']

data_full = data_full.drop([
    'Se - non combustion - Agglomeration plant - pellets - air',
    'Se - non combustion - Agglomeration plant - sinter - air',
    'Se - non combustion - Glass production - air',
    'Se - non combustion - Steel production: basic oxygen furnace - air'], axis=1)

In [137]:
# column that sums elements non combustion related variables
data_full['gov_Zn_non_combustion_total'] = data_full['Zn - non combustion - Agglomeration plant - pellets - air'] + data_full['Zn - non combustion - Agglomeration plant - sinter - air'] + data_full['Zn - non combustion - Glass production - air'] + data_full['Zn - non combustion - Nickel, unwrought - air'] + data_full['Zn - non combustion - Pig iron production, blast furnace - air'] + data_full['Zn - non combustion - Refined copper; unwrought, not alloyed - air'] + data_full['Zn - non combustion - Refined lead, unwrought - air'] + data_full['Zn - non combustion - Steel production: basic oxygen furnace - air'] + data_full['Zn - non combustion - Steel production: electric arc furnace - air'] + data_full['Zn - non combustion - Steel production: open hearth furnace - air'] + data_full['Zn - non combustion - Unrefined copper; copper anodes for electrolytic refining - air'] + data_full['Zn - non combustion - Zinc, unwrought, not alloyed - air']

data_full = data_full.drop([
    'Zn - non combustion - Agglomeration plant - pellets - air',
    'Zn - non combustion - Agglomeration plant - sinter - air',
    'Zn - non combustion - Glass production - air',
    'Zn - non combustion - Nickel, unwrought - air',
    'Zn - non combustion - Pig iron production, blast furnace - air',
    'Zn - non combustion - Refined copper; unwrought, not alloyed - air',
    'Zn - non combustion - Refined lead, unwrought - air',
    'Zn - non combustion - Steel production: basic oxygen furnace - air',
    'Zn - non combustion - Steel production: electric arc furnace - air',
    'Zn - non combustion - Steel production: open hearth furnace - air',
    'Zn - non combustion - Unrefined copper; copper anodes for electrolytic refining - air',
    'Zn - non combustion - Zinc, unwrought, not alloyed - air'], axis=1)

In [138]:
# column that sums cropland related variables
data_full['gov_Cropland_total'] = data_full['Cropland - Cereal grains nec'] + data_full['Cropland - Crops nec'] + data_full['Cropland - Fodder crops-Cattle'] + data_full['Cropland - Fodder crops-Meat animals nec'] + data_full['Cropland - Fodder crops-Pigs'] + data_full['Cropland - Fodder crops-Poultry'] + data_full['Cropland - Fodder crops-Raw milk'] + data_full['Cropland - Oil seeds'] + data_full['Cropland - Paddy rice'] + data_full['Cropland - Plant-based fibers'] + data_full['Cropland - Sugar cane, sugar beet'] + data_full['Cropland - Vegetables, fruit, nuts'] + data_full['Cropland - Wheat']

data_full = data_full.drop([ 
    'Cropland - Cereal grains nec',
    'Cropland - Crops nec',
    'Cropland - Fodder crops-Cattle',
    'Cropland - Fodder crops-Meat animals nec',
    'Cropland - Fodder crops-Pigs',
    'Cropland - Fodder crops-Poultry',
    'Cropland - Fodder crops-Raw milk',
    'Cropland - Oil seeds',
    'Cropland - Paddy rice',
    'Cropland - Plant-based fibers',
    'Cropland - Sugar cane, sugar beet',
    'Cropland - Vegetables, fruit, nuts',
    'Cropland - Wheat'], axis=1)

In [139]:
# dropping all other variables that do not have a significant impact on the target variable
data_full = data_full.drop([
    'GHG emissions (GWP100min) | Problem oriented approach: non baseline (CML, 1999) | net GWP100 min(Houghton et al., 2001)',
    'GHG emissions (GWP100max) | Problem oriented approach: non baseline (CML, 1999) | net GWP100 max(Houghton et al., 2001)',
    'GHG emissions (GWP20) | Problem oriented approach: non baseline (CML, 1999) | GWP20 (IPCC, 2007)',
    'GHG emissions (GWP500) | Problem oriented approach: non baseline (CML, 1999) | GWP500 (IPCC, 2007)',
    'GHG emissions (GWP100min) | Problem oriented approach: non baseline (CML, 1999) | net GWP100 min(Houghton et al., 2001)',
    'GHG emissions (GWP100max) | Problem oriented approach: non baseline (CML, 1999) | net GWP100 max(Houghton et al., 2001)',
    'GHG emissions (GWP20) | Problem oriented approach: non baseline (CML, 1999) | GWP20 (IPCC, 2007)',
    'GHG emissions (GWP500) | Problem oriented approach: non baseline (CML, 1999) | GWP500 (IPCC, 2007)',
    'ozone layer depletion (ODP steady state) | Problem oriented approach: baseline (CML, 1999) | ODP steady state (WMO, 2003)',
    'human toxicity (HTP inf) | Problem oriented approach: baseline (CML, 1999) | HTP inf. (Huijbregts, 1999 & 2000)',
    'Freshwater aquatic ecotoxicity (FAETP inf) | Problem oriented approach: baseline (CML, 1999) | FAETP inf. (Huijbregts, 1999 & 2000)',
    'Marine aquatic ecotoxicity (MAETP inf) | Problem oriented approach: baseline (CML, 1999) | MAETP inf. (Huijbregts, 1999 & 2000)',
    'Freshwater sedimental ecotoxicity (FSETP inf) | Problem oriented approach: non baseline (CML, 1999) | FSETP inf. (Huijbregts, 1999 & 2000)',
    'Marine sedimental ecotoxicity (MSETP inf) | Problem oriented approach: non baseline (CML, 1999) | MSETP inf. (Huijbregts, 1999 & 2000)',
    'Terrestrial ecotoxicity (TETP inf) | Problem oriented approach: baseline (CML, 1999) | TETP inf.(Huijbregts, 1999 & 2000)',
    'human toxicity (HTP20) | Problem oriented approach: non baseline (CML, 1999) | HTP 20 (Huijbregts, 1999 & 2000)',
    'Freshwater aquatic ecotoxicity (FAETP20) | Problem oriented approach: non baseline (CML, 1999) | FAETP 20 (Huijbregts, 1999 & 2000)',
    'Marine aquatic ecotoxicity (MAETP20) | Problem oriented approach: non baseline (CML, 1999) | MAETP 20 (Huijbregts, 1999 & 2000)',
    'Freshwater sedimental ecotoxicity (FSETP20) | Problem oriented approach: non baseline (CML, 1999) | FSETP 20 (Huijbregts, 1999 & 2000)',
    'Marine sedimental ecotoxicity (MSETP20) | Problem oriented approach: non baseline (CML, 1999) | MSETP 20 (Huijbregts, 1999 & 2000)',
    'Terrestrial ecotoxicity (TETP20) | Problem oriented approach: non baseline (CML, 1999) | TETP 20 (Huijbregts, 1999 & 2000)',
    'human toxicity (HTP100) | Problem oriented approach: non baseline (CML, 1999) | HTP 100 (Huijbregts, 1999 & 2000)',
    'Freshwater aquatic ecotoxicity (FAETP100) | Problem oriented approach: non baseline (CML, 1999) | FAETP 100 (Huijbregts, 1999 & 2000)',
    'Marine aquatic ecotoxicity (MAETP100) | Problem oriented approach: non baseline (CML, 1999) | MAETP 100 (Huijbregts, 1999 & 2000)',
    'Freshwater sedimental ecotoxicity (FSETP100) | Problem oriented approach: non baseline (CML, 1999) | FSETP 100 (Huijbregts, 1999 & 2000)',
    'Marine sedimental ecotoxicity (MSETP100) | Problem oriented approach: non baseline (CML, 1999) | MSETP 100 (Huijbregts, 1999 & 2000)',
    'Terrestrial ecotoxicity (TETP100) | Problem oriented approach: non baseline (CML, 1999) | TETP 100 (Huijbregts, 1999 & 2000)',
    'human toxicity (HTP500) | Problem oriented approach: non baseline (CML, 1999) | HTP 500 (Huijbregts, 1999 & 2000)',
    'Freshwater aquatic ecotoxicity (FAETP500) | Problem oriented approach: non baseline (CML, 1999) | FAETP 500 (Huijbregts, 1999 & 2000)',
    'Marine aquatic ecotoxicity (MAETP500) | Problem oriented approach: non baseline (CML, 1999) | MAETP 500 (Huijbregts, 1999 & 2000)',
    'Freshwater sedimental ecotoxicity (FSETP500) | Problem oriented approach: non baseline (CML, 1999) | FSETP 500 (Huijbregts, 1999 & 2000)',
    'Marine sedimental ecotoxicity (MSETP500) | Problem oriented approach: non baseline (CML, 1999) | MSETP 500 (Huijbregts, 1999 & 2000)',
    'Terrestrial ecotoxicity (TETP500) | Problem oriented approach: non baseline (CML, 1999) | TETP 500 (Huijbregts, 1999 & 2000) TETP 500',
    'photochemical oxidation (high NOx) | Problem oriented approach: baseline (CML, 1999) | POCP (Jenkin & Hayman, 1999; Derwent et al. 1998; high NOx)',
    'photochemical oxidation (low NOx) | Problem oriented approach: non baseline (CML, 1999) | POCP (Andersson-Skï¿½ld et al. 1992; low NOx)',
    'photochemical oxidation (MIR; very high NOx) | Problem oriented approach: non baseline (CML, 1999) | MIR 1997; very high NOx (Carter, 1994, 1997, 1998;Carter, Pierce, Luo &  Malkina, 1995 )',
    'photochemical oxidation (MOIR; high NOx) | Problem oriented approach: non baseline (CML, 1999) | MOIR; high NOx (Carter, 1994, 1997, 1998;Carter, Pierce, Luo &  Malkina, 1995 )',
    'photochemical oxidation (EBIR; low NOx) | Problem oriented approach: non baseline (CML, 1999) | EBIR; low NOx (Carter, 1994, 1997, 1998;Carter, Pierce, Luo &  Malkina, 1995 )',
    'acidification (incl. fate, average Europe total, A&B) | Problem oriented approach: baseline (CML, 1999) | AP ( Huijbregts, 1999; average Europe total, A&B)',
    'acidification (fate not incl.) | Problem oriented approach: non baseline (CML, 1999) | AP (Hauschild & Wenzel (1998).',
    'eutrophication (fate not incl.) | Problem oriented approach: baseline (CML, 1999) | EP (Heijungs et al. 1992))',
    'eutrophication (incl. fate, average Europe total, A&B) | Problem oriented approach: non baseline (CML, 1999) | EP ( Huijbregts, 1999; average Europe total, A&B)',
    'odour | Problem oriented approach: non baseline (CML, 1999) | 1/OTV',
    'GHG emissions AR5 (GWP100) | GWP100 (IPCC, 2010)',
    'Nitrogen',
    'Phosphorous',
    'PM10',
    'PM25',
    'SOx',
    'NOx',
    'NH3 - non combustion - N- fertilizer production - air',
    'SF6 - air',
    'HFC - air',
    'PFC - air',
    'CH4 - agriculture - air',
    'CO2 - agriculture - peat decay - air',
    'N - agriculture - water',
    'N2O - agriculture - air',
    'NH3 - agriculture - air',
    'NOX - agriculture - air',
    'P - agriculture - soil',
    'P - agriculture - water',
    'Pxx - agriculture - soil',
    'CH4 - waste - air',
    'CO - waste - air',
    'CO2 - waste - biogenic - air',
    'CO2 - waste - fossil - air',
    'N - waste - water',
    'NH3 - waste - air',
    'NOX - waste - air',
    'P - waste - water',
    'PM2.5 - waste - air',
    'SOx - waste - air',
    'Employment hour',
    'Employment',
    "GHG emissions (GWP100) | Problem oriented approach: baseline (CML, 2001) | GWP100 (IPCC, 2007)"], axis=1)

In [91]:
# save the data
data_full.to_csv('data_combined_final.csv', index=False)