In [1]:
import os
import pandas as pd
import plotly.graph_objects as go


In [2]:
path = os.path.join('data', 'dataset_alpha.csv')
data = pd.read_csv(path)
variables = ['x1', 'x28', 'x8', 'x38', 'x48']
data['cost'] = data[variables].sum(axis=1)

In [112]:
data['cost']

0      404.527500
1      225.928740
2      185.096523
3      196.530000
4      876.980000
          ...    
210    249.153333
211    337.533177
212    230.471000
213    370.646000
214    307.379583
Name: cost, Length: 215, dtype: float64

In [113]:
continent_data = data.groupby('continent').mean().reset_index()

fig = go.Figure(
    data = go.Bar(
        x = continent_data['continent'],
        y = continent_data['cost']
    )
    )

fig.update_layout(
    title = dict(
            text=f'Average cost per continent',
            font=dict(size=30),
        )
    )


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [13]:
data.unesco_props.min()

0.0

In [114]:
data.columns

Index(['country', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x23', 'x25',
       'x24', 'x27', 'x28', 'x30', 'x33', 'x37', 'x38', 'x48', 'x49',
       'unesco_props', 'safety_index', 'quality_of_life', 'total_population',
       'code', 'GDP', 'pop_0_14_%', 'pop_0_14_%_female', 'pop_0_14_%_male',
       'pop_15_64_%', 'pop_15_64_%_female', 'pop_15_64_%_male',
       'pop_65_plus_%', 'pop_65_plus_%_female', 'pop_65_plus_%_male',
       'continent', 'female_population', 'male_population', 'cost'],
      dtype='object')

In [115]:
country = 'Portugal'
country_data = data[data.country == country]

In [116]:
country_data.columns

Index(['country', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x23', 'x25',
       'x24', 'x27', 'x28', 'x30', 'x33', 'x37', 'x38', 'x48', 'x49',
       'unesco_props', 'safety_index', 'quality_of_life', 'total_population',
       'code', 'GDP', 'pop_0_14_%', 'pop_0_14_%_female', 'pop_0_14_%_male',
       'pop_15_64_%', 'pop_15_64_%_female', 'pop_15_64_%_male',
       'pop_65_plus_%', 'pop_65_plus_%_female', 'pop_65_plus_%_male',
       'continent', 'female_population', 'male_population', 'cost'],
      dtype='object')

In [117]:
pop_columns = [col for col in country_data.columns if col.startswith('pop_') and not col.endswith('%')]

In [118]:
pop_data = country_data[pop_columns].T.reset_index()

In [119]:
pop_data.columns = ['type_pop', 'percentage']

In [120]:
pop_data

Unnamed: 0,type_pop,percentage
0,pop_0_14_%_female,12.36029
1,pop_0_14_%_male,14.443874
2,pop_15_64_%_female,62.786259
3,pop_15_64_%_male,65.562452
4,pop_65_plus_%_female,24.853451
5,pop_65_plus_%_male,19.993674


In [122]:
pop_data['gender'] = ''
pop_data['age-range'] = ''
for index, desc in enumerate(pop_data['type_pop']):
    if 'female' in desc:
        pop_data.loc[index, 'gender'] = 'Female'
        pop_data.loc[index, 'percentage'] = pop_data.loc[index, 'percentage'] * country_data['female_population'].values[0] / 100

    else:
        pop_data.loc[index, 'gender'] = 'Male'
        pop_data.loc[index, 'percentage'] = pop_data.loc[index, 'percentage'] * country_data['male_population'].values[0] / 100


    if '0_14' in desc:
        pop_data.loc[index, 'age-range'] = '0-14'
    elif '15_64' in desc:
        pop_data.loc[index, 'age-range'] = '15-64'

    else:
        pop_data.loc[index, 'age-range'] = '65+'


    

In [124]:
pop_data['percentage'].sum()

10325146.999999998

In [125]:
fig = go.Figure(
    data=[
    go.Bar(name='Female', x=pop_data.loc[pop_data.gender == 'Female', 'age-range'], y=pop_data.loc[pop_data.gender == 'Female', 'percentage']),
    go.Bar(name='Male', x=pop_data.loc[pop_data.gender == 'Male', 'age-range'], y=pop_data.loc[pop_data.gender == 'Male', 'percentage'])
    ]
)
fig.show()

In [158]:
meals = [1, 2, 3]
market = [23, 25, 24, 27]
transports = [28, 30, 33]
internet = [37, 38]
habitation = [48, 49]

In [159]:
meals = [f'x{m}' for m in meals]
market = [f'x{m}' for m in market]
transports = [f'x{m}' for m in transports]
internet = [f'x{m}' for m in internet]
habitation = [f'x{m}' for m in habitation]

In [175]:
costs = {
    'Meals' : ((country_data['x1'] + country_data['x2']/2 + country_data['x3']) / 3).values[0] * 2,
    'Market' : country_data[market].mean(axis=0).values[0],
    'Transports' : country_data[transports].mean(axis=0).values[0] * 3,
    'Telecommunications' : country_data[internet].mean(axis=0).values[0],
    'Accomodation' : country_data[habitation].mean(axis=0).values[0] / 30.437
}
costs


{'Meals': 22.197752639517343,
 'Market': 0.616,
 'Transports': 5.205,
 'Telecommunications': 0.1502325581395348,
 'Accomodation': 21.22344209274055}

In [172]:
values = list(costs.values())
names = list(costs.keys())

In [176]:
fig = go.Figure(
    go.Pie(
        labels = names,
        values = values
    )
)
fig.update_layout(
    title = dict(
        text = '% of each cost for the total value',
        x = 0.5
    )
)
fig.show()