In [1]:
import pandas as pd

Loading raw datasets

In [2]:
economic_data = pd.read_excel('dataset/unprepared_dataset/BRICS Economic Data.xlsx')
education_data = pd.read_excel('dataset/unprepared_dataset/BRICS Education and Environment Data.xlsx')
health_data = pd.read_excel('dataset/unprepared_dataset/BRICS Health and Poverty Data.xlsx')
private_sector_data = pd.read_excel('dataset/unprepared_dataset/BRICS Private Sector Data.xlsx')
public_sector_data = pd.read_excel('dataset/unprepared_dataset/BRICS Public Sector and Society Data.xlsx')
datasets = [economic_data, education_data, health_data, private_sector_data, public_sector_data]

Remove unnecessary fields and regroup the dataset

In [3]:
prepared_datasets = []
attribute_values = [345, 285, 277, 168, 319]
for dataset, attribute_value in zip(datasets, attribute_values):
    dataset = dataset.drop(labels=['Country Name', 'Country Code', 'Series Code'], axis=1)
    condition = (pd.isna(dataset['Series Name']))
    dataset = dataset.drop(dataset[condition].index)
    dataset = pd.concat([
        dataset.iloc[:attribute_value, :].reset_index().transpose(),
        dataset.iloc[attribute_value:attribute_value*2, :].reset_index().transpose(),
        dataset.iloc[attribute_value*2:attribute_value*3, :].reset_index().transpose(),
        dataset.iloc[attribute_value*3:attribute_value*4, :].reset_index().transpose(),
        dataset.iloc[attribute_value*4:, :].reset_index().transpose()
    ], ignore_index=True).drop(labels=[0, 53, 54, 106, 107, 159, 160, 212, 213], axis=0)
    prepared_datasets.append(dataset)

Connect the resulting pieces

In [4]:
concat_dataset = pd.concat(prepared_datasets, axis=1).reset_index().iloc[:, 1:]
concat_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,309,310,311,312,313,314,315,316,317,318
0,Adjusted net national income (annual % growth),Adjusted net national income (constant 2015 US$),Adjusted net national income (current US$),Adjusted net national income per capita (annua...,Adjusted net national income per capita (const...,Adjusted net national income per capita (curre...,"Adjusted net savings, excluding particulate em...","Adjusted net savings, excluding particulate em...","Adjusted net savings, including particulate em...","Adjusted net savings, including particulate em...",...,"Unemployment, youth male (% of male labor forc...","Unemployment, youth male (% of male labor forc...","Unemployment, youth total (% of total labor fo...","Unemployment, youth total (% of total labor fo...","Vulnerable employment, female (% of female emp...","Vulnerable employment, male (% of male employm...","Vulnerable employment, total (% of total emplo...","Wage and salaried workers, female (% of female...","Wage and salaried workers, male (% of male emp...","Wage and salaried workers, total (% of total e..."
1,,308554905378.880432,38799440792.935898,,3244.094262,407.928808,,,,,...,,,,,,,,,,
2,11.64947,344499916279.819092,45244303462.111702,8.93544,3533.968346,464.125405,,,,,...,,,,,,,,,,
3,12.344548,387026874506.892334,53813384465.657799,9.670966,3875.737209,538.891591,,,,,...,,,,,,,,,,
4,11.992465,433440935297.989868,72178679239.526306,9.363915,4238.65796,705.838395,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,-0.289369,261379294061.43338,241507035833.834991,-1.746295,4649.518309,4296.693424,0.652777,1881087181.38016,0.159571,459832130.915643,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
252,2.644555,268291614395.350586,285772371552.637024,1.200392,4705.360279,5012.692856,1.70025,5765053300.62488,1.21908,4133545546.66279,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
253,-0.358592,267329542519.672638,299491029449.79303,-1.708182,4644.421774,5182.176334,0.271589,968619743.668668,-0.209581,-747468063.73103,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
254,0.417111,268444602750.852417,284723341310.499023,-0.89601,4597.557213,4862.222806,-0.288157,-984123511.536506,-0.75462,-2577200301.00945,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Dataset of the first version with GDP and GNI dependencies removed

 Create a list of parameters dependent on GDP or GNI

In [5]:
remove_list = []
for label in concat_dataset.iloc[0:1, :].values.tolist()[0]:
    if ('GDP' in label or 'GNI' in label) and label != 'GDP per capita (current US$)': #  Target feature
        remove_list.append(label)
remove_list

['Adjusted net savings, excluding particulate emission damage (% of GNI)',
 'Adjusted net savings, including particulate emission damage (% of GNI)',
 'Adjusted savings: carbon dioxide damage (% of GNI)',
 'Adjusted savings: consumption of fixed capital (% of GNI)',
 'Adjusted savings: education expenditure (% of GNI)',
 'Adjusted savings: energy depletion (% of GNI)',
 'Adjusted savings: gross savings (% of GNI)',
 'Adjusted savings: mineral depletion (% of GNI)',
 'Adjusted savings: natural resources depletion (% of GNI)',
 'Adjusted savings: net forest depletion (% of GNI)',
 'Adjusted savings: net national savings (% of GNI)',
 'Adjusted savings: particulate emission damage (% of GNI)',
 'Agriculture, forestry, and fishing, value added (% of GDP)',
 'Current account balance (% of GDP)',
 'Discrepancy in expenditure estimate of GDP (constant LCU)',
 'Discrepancy in expenditure estimate of GDP (current LCU)',
 'Exports of goods and services (% of GDP)',
 'External balance on goods an

Replaces column names

In [6]:
correct_column_names_dataset = concat_dataset.set_axis(concat_dataset.iloc[0:1, :].values.tolist()[0], axis=1)
correct_column_names_dataset = correct_column_names_dataset.iloc[1:, :].reset_index().iloc[:, 1:]
correct_column_names_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income (constant 2015 US$),Adjusted net national income (current US$),Adjusted net national income per capita (annual % growth),Adjusted net national income per capita (constant 2015 US$),Adjusted net national income per capita (current US$),"Adjusted net savings, excluding particulate emission damage (% of GNI)","Adjusted net savings, excluding particulate emission damage (current US$)","Adjusted net savings, including particulate emission damage (% of GNI)","Adjusted net savings, including particulate emission damage (current US$)",...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,308554905378.880432,38799440792.935898,,3244.094262,407.928808,,,,,...,,,,,,,,,,
1,11.64947,344499916279.819092,45244303462.111702,8.93544,3533.968346,464.125405,,,,,...,,,,,,,,,,
2,12.344548,387026874506.892334,53813384465.657799,9.670966,3875.737209,538.891591,,,,,...,,,,,,,,,,
3,11.992465,433440935297.989868,72178679239.526306,9.363915,4238.65796,705.838395,,,,,...,,,,,,,,,,
4,5.131874,455684576860.778931,95496328254.079498,2.675231,4352.051841,912.040879,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,261379294061.43338,241507035833.834991,-1.746295,4649.518309,4296.693424,0.652777,1881087181.38016,0.159571,459832130.915643,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,268291614395.350586,285772371552.637024,1.200392,4705.360279,5012.692856,1.70025,5765053300.62488,1.21908,4133545546.66279,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,267329542519.672638,299491029449.79303,-1.708182,4644.421774,5182.176334,0.271589,968619743.668668,-0.209581,-747468063.73103,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,268444602750.852417,284723341310.499023,-0.89601,4597.557213,4862.222806,-0.288157,-984123511.536506,-0.75462,-2577200301.00945,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Removing features that depend on GDP or GNI

In [7]:
remove_dependencies_dataset = correct_column_names_dataset.drop(labels=remove_list, axis=1)
remove_dependencies_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income (constant 2015 US$),Adjusted net national income (current US$),Adjusted net national income per capita (annual % growth),Adjusted net national income per capita (constant 2015 US$),Adjusted net national income per capita (current US$),"Adjusted net savings, excluding particulate emission damage (current US$)","Adjusted net savings, including particulate emission damage (current US$)",Adjusted savings: carbon dioxide damage (current US$),Adjusted savings: consumption of fixed capital (current US$),...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,308554905378.880432,38799440792.935898,,3244.094262,407.928808,,,247930608.175318,2822871749.83211,...,,,,,,,,,,
1,11.64947,344499916279.819092,45244303462.111702,8.93544,3533.968346,464.125405,,,291570540.153229,3323606999.9882,...,,,,,,,,,,
2,12.344548,387026874506.892334,53813384465.657799,9.670966,3875.737209,538.891591,,,346555491.155778,4115471405.44596,...,,,,,,,,,,
3,11.992465,433440935297.989868,72178679239.526306,9.363915,4238.65796,705.838395,,,432929009.000466,6002497917.22738,...,,,,,,,,,,
4,5.131874,455684576860.778931,95496328254.079498,2.675231,4352.051841,912.040879,,,522547346.019495,8170194969.79321,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,261379294061.43338,241507035833.834991,-1.746295,4649.518309,4296.693424,1881087181.38016,459832130.915643,17119709390.4196,42013514111.1549,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,268291614395.350586,285772371552.637024,1.200392,4705.360279,5012.692856,5765053300.62488,4133545546.66279,17679852909.932899,47982186971.519997,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,267329542519.672638,299491029449.79303,-1.708182,4644.421774,5182.176334,968619743.668668,-747468063.73103,18243787424.042702,51117701564.701797,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,268444602750.852417,284723341310.499023,-0.89601,4597.557213,4862.222806,-984123511.536506,-2577200301.00945,18616818998.388901,49353462039.636002,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Remove all na columns

In [8]:
without_na_dataset = remove_dependencies_dataset.dropna(axis=1, how='all')
without_na_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income (constant 2015 US$),Adjusted net national income (current US$),Adjusted net national income per capita (annual % growth),Adjusted net national income per capita (constant 2015 US$),Adjusted net national income per capita (current US$),"Adjusted net savings, excluding particulate emission damage (current US$)","Adjusted net savings, including particulate emission damage (current US$)",Adjusted savings: carbon dioxide damage (current US$),Adjusted savings: consumption of fixed capital (current US$),...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,308554905378.880432,38799440792.935898,,3244.094262,407.928808,,,247930608.175318,2822871749.83211,...,,,,,,,,,,
1,11.64947,344499916279.819092,45244303462.111702,8.93544,3533.968346,464.125405,,,291570540.153229,3323606999.9882,...,,,,,,,,,,
2,12.344548,387026874506.892334,53813384465.657799,9.670966,3875.737209,538.891591,,,346555491.155778,4115471405.44596,...,,,,,,,,,,
3,11.992465,433440935297.989868,72178679239.526306,9.363915,4238.65796,705.838395,,,432929009.000466,6002497917.22738,...,,,,,,,,,,
4,5.131874,455684576860.778931,95496328254.079498,2.675231,4352.051841,912.040879,,,522547346.019495,8170194969.79321,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,261379294061.43338,241507035833.834991,-1.746295,4649.518309,4296.693424,1881087181.38016,459832130.915643,17119709390.4196,42013514111.1549,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,268291614395.350586,285772371552.637024,1.200392,4705.360279,5012.692856,5765053300.62488,4133545546.66279,17679852909.932899,47982186971.519997,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,267329542519.672638,299491029449.79303,-1.708182,4644.421774,5182.176334,968619743.668668,-747468063.73103,18243787424.042702,51117701564.701797,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,268444602750.852417,284723341310.499023,-0.89601,4597.557213,4862.222806,-984123511.536506,-2577200301.00945,18616818998.388901,49353462039.636002,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


In [9]:
main_dataset = without_na_dataset.copy()

Save the resulting dataset

In [10]:
main_dataset.to_excel(r'dataset/prepared_dataset/BRICS_all_areas_prepared_dataset.xlsx')

Dataset of the second version, containing only %

Сreate a list of parameters dependent on GDP or GNI or do not contain '%' in the name.

In [11]:
remove_list = []
for label in concat_dataset.iloc[0:1, :].values.tolist()[0]:
    if ('%' not in label or ('GDP' in label or 'GNI' in label)) and label != 'GDP per capita (current US$)'  : #  Target feature
        remove_list.append(label)
remove_list

['Adjusted net national income (constant 2015 US$)',
 'Adjusted net national income (current US$)',
 'Adjusted net national income per capita (constant 2015 US$)',
 'Adjusted net national income per capita (current US$)',
 'Adjusted net savings, excluding particulate emission damage (% of GNI)',
 'Adjusted net savings, excluding particulate emission damage (current US$)',
 'Adjusted net savings, including particulate emission damage (% of GNI)',
 'Adjusted net savings, including particulate emission damage (current US$)',
 'Adjusted savings: carbon dioxide damage (% of GNI)',
 'Adjusted savings: carbon dioxide damage (current US$)',
 'Adjusted savings: consumption of fixed capital (% of GNI)',
 'Adjusted savings: consumption of fixed capital (current US$)',
 'Adjusted savings: education expenditure (% of GNI)',
 'Adjusted savings: education expenditure (current US$)',
 'Adjusted savings: energy depletion (% of GNI)',
 'Adjusted savings: energy depletion (current US$)',
 'Adjusted savin

Replaces column names

In [12]:
correct_column_names_dataset = concat_dataset.set_axis(concat_dataset.iloc[0:1, :].values.tolist()[0], axis=1)
correct_column_names_dataset = correct_column_names_dataset.iloc[1:, :].reset_index().iloc[:, 1:]
correct_column_names_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income (constant 2015 US$),Adjusted net national income (current US$),Adjusted net national income per capita (annual % growth),Adjusted net national income per capita (constant 2015 US$),Adjusted net national income per capita (current US$),"Adjusted net savings, excluding particulate emission damage (% of GNI)","Adjusted net savings, excluding particulate emission damage (current US$)","Adjusted net savings, including particulate emission damage (% of GNI)","Adjusted net savings, including particulate emission damage (current US$)",...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,308554905378.880432,38799440792.935898,,3244.094262,407.928808,,,,,...,,,,,,,,,,
1,11.64947,344499916279.819092,45244303462.111702,8.93544,3533.968346,464.125405,,,,,...,,,,,,,,,,
2,12.344548,387026874506.892334,53813384465.657799,9.670966,3875.737209,538.891591,,,,,...,,,,,,,,,,
3,11.992465,433440935297.989868,72178679239.526306,9.363915,4238.65796,705.838395,,,,,...,,,,,,,,,,
4,5.131874,455684576860.778931,95496328254.079498,2.675231,4352.051841,912.040879,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,261379294061.43338,241507035833.834991,-1.746295,4649.518309,4296.693424,0.652777,1881087181.38016,0.159571,459832130.915643,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,268291614395.350586,285772371552.637024,1.200392,4705.360279,5012.692856,1.70025,5765053300.62488,1.21908,4133545546.66279,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,267329542519.672638,299491029449.79303,-1.708182,4644.421774,5182.176334,0.271589,968619743.668668,-0.209581,-747468063.73103,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,268444602750.852417,284723341310.499023,-0.89601,4597.557213,4862.222806,-0.288157,-984123511.536506,-0.75462,-2577200301.00945,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Removing features that depend on GDP or GNI or do not contain '%' in the name

In [13]:
remove_dependencies_dataset = correct_column_names_dataset.drop(labels=remove_list, axis=1)
remove_dependencies_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income per capita (annual % growth),"Agriculture, forestry, and fishing, value added (annual % growth)",Chemicals (% of value added in manufacturing),"Communications, computer, etc. (% of service exports, BoP)","Communications, computer, etc. (% of service imports, BoP)","Debt service (PPG and IMF only, % of exports of goods, services and primary income)",Exports of goods and services (annual % growth),Final consumption expenditure (annual % growth),"Food, beverages and tobacco (% of value added in manufacturing)",...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,,5.6,,,,,6.201381,15.160838,,...,,,,,,,,,,
1,11.64947,8.93544,10.154178,,,,,5.509979,11.945017,,...,,,,,,,,,,
2,12.344548,9.670966,3.967431,,,,,24.163169,10.792696,,...,,,,,,,,,,
3,11.992465,9.363915,0.075501,,,,,14.254772,9.906056,,...,,,,,,,,,,
4,5.131874,2.675231,1.299594,,,,,2.333786,9.085887,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,-1.746295,-5.151363,6.996506,24.530185,41.472321,5.84175,0.407509,0.989121,22.179296,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,1.200392,19.080359,6.996506,25.170093,39.961146,5.017113,-0.273236,1.252008,22.179296,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,-1.708182,0.401916,6.996506,26.390601,37.613985,13.180881,2.808833,2.073555,22.179296,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,-0.89601,-6.251261,6.996506,26.02065,39.428475,7.506394,-3.408566,1.434285,22.179296,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Remove all na columns

In [14]:
without_na_dataset = remove_dependencies_dataset.dropna(axis=1, how='all')
without_na_dataset

Unnamed: 0,Adjusted net national income (annual % growth),Adjusted net national income per capita (annual % growth),"Agriculture, forestry, and fishing, value added (annual % growth)",Chemicals (% of value added in manufacturing),"Communications, computer, etc. (% of service exports, BoP)","Communications, computer, etc. (% of service imports, BoP)","Debt service (PPG and IMF only, % of exports of goods, services and primary income)",Exports of goods and services (annual % growth),Final consumption expenditure (annual % growth),"Food, beverages and tobacco (% of value added in manufacturing)",...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth male (% of male labor force ages 15-24) (national estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (national estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
0,,,5.6,,,,,6.201381,15.160838,,...,,,,,,,,,,
1,11.64947,8.93544,10.154178,,,,,5.509979,11.945017,,...,,,,,,,,,,
2,12.344548,9.670966,3.967431,,,,,24.163169,10.792696,,...,,,,,,,,,,
3,11.992465,9.363915,0.075501,,,,,14.254772,9.906056,,...,,,,,,,,,,
4,5.131874,2.675231,1.299594,,,,,2.333786,9.085887,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,-0.289369,-1.746295,-5.151363,6.996506,24.530185,41.472321,5.84175,0.407509,0.989121,22.179296,...,48.949001,48.59,53.683998,53.349998,9.7,9.5,9.59,87.660004,82.760002,84.879997
251,2.644555,1.200392,19.080359,6.996506,25.170093,39.961146,5.017113,-0.273236,1.252008,22.179296,...,49.165001,48.82,53.532001,53.200001,10.22,9.69,9.93,87.019997,82.589996,84.529999
252,-0.358592,-1.708182,0.401916,6.996506,26.390601,37.613985,13.180881,2.808833,2.073555,22.179296,...,49.493999,49.150002,53.744999,53.41,10.24,10.39,10.32,87.459999,82.050003,84.43
253,0.417111,-0.89601,-6.251261,6.996506,26.02065,39.428475,7.506394,-3.408566,1.434285,22.179296,...,53.561001,53.220001,57.379002,57.049999,10.59,10.73,10.67,86.690002,81.269997,83.660004


Save dataset

In [15]:
main_dataset = without_na_dataset.copy()

In [16]:
main_dataset.to_excel(r'dataset/prepared_dataset/BRICS_all_areas_prepared_dataset_only_%.xlsx')