In [25]:
import pandas as pd
import numpy as np
import plotly.express as px

In [26]:
wgi = pd.read_excel('wgidataset.xlsx')

In [27]:
rl = wgi[wgi['indicator'] == 'rl'].reset_index(drop=True)

In [28]:
rl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5350 entries, 0 to 5349
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   codeindyr     5350 non-null   object
 1   code          5350 non-null   object
 2   countryname   5350 non-null   object
 3   year          5350 non-null   int64 
 4   indicator     5350 non-null   object
 5   estimate      5350 non-null   object
 6   stddev        5350 non-null   object
 7   nsource       5350 non-null   object
 8   pctrank       5350 non-null   object
 9   pctranklower  5350 non-null   object
 10  pctrankupper  5350 non-null   object
dtypes: int64(1), object(10)
memory usage: 459.9+ KB


In [29]:
rl = rl.iloc[:, [2, 3, 8]]

In [30]:
rl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5350 entries, 0 to 5349
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   countryname  5350 non-null   object
 1   year         5350 non-null   int64 
 2   pctrank      5350 non-null   object
dtypes: int64(1), object(2)
memory usage: 125.5+ KB


In [31]:
rl = rl.replace({
    '..': np.nan
})


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [32]:
rl['pctrank'].describe()

count    5233.000000
mean       50.050390
std        29.056678
min         0.000000
25%        24.880383
50%        50.000000
75%        75.829384
max       100.000000
Name: pctrank, dtype: float64

In [33]:
rl_mundo = rl.groupby('year')['pctrank'].median().reset_index()

In [34]:
rl_mundo.head()

Unnamed: 0,year,pctrank
0,1996,50.0
1,1998,50.0
2,2000,50.0
3,2002,50.0
4,2003,50.0


In [35]:
rl_mundo['regiao'] = 'Mundo'

In [36]:
rl_mundo.head()

Unnamed: 0,year,pctrank,regiao
0,1996,50.0,Mundo
1,1998,50.0,Mundo
2,2000,50.0,Mundo
3,2002,50.0,Mundo
4,2003,50.0,Mundo


In [37]:
rl_mundo = rl_mundo.melt(
    id_vars=['year', 'regiao'],
    value_vars='pctrank',
    var_name='porcentagem',
    value_name='valores'
)

In [38]:
rl_mundo.head()

Unnamed: 0,year,regiao,porcentagem,valores
0,1996,Mundo,pctrank,50.0
1,1998,Mundo,pctrank,50.0
2,2000,Mundo,pctrank,50.0
3,2002,Mundo,pctrank,50.0
4,2003,Mundo,pctrank,50.0


In [39]:
rl_brasil = rl.loc[rl['countryname'] == 'Brazil'].reset_index(drop=True)

In [40]:
rl_brasil.head()

Unnamed: 0,countryname,year,pctrank
0,Brazil,1996,44.221104
1,Brazil,1998,45.0
2,Brazil,2000,42.288559
3,Brazil,2002,43.781094
4,Brazil,2003,42.786068


In [41]:
rl_brasil = rl_brasil.melt(
    id_vars=['year', 'countryname'],
    value_vars='pctrank',
    var_name='porcentagem',
    value_name='valores'
)

In [42]:
rl_brasil.head()

Unnamed: 0,year,countryname,porcentagem,valores
0,1996,Brazil,pctrank,44.221104
1,1998,Brazil,pctrank,45.0
2,2000,Brazil,pctrank,42.288559
3,2002,Brazil,pctrank,43.781094
4,2003,Brazil,pctrank,42.786068


In [43]:
rl_brasil = rl_brasil.rename(columns={
    'countryname': 'regiao'
})

In [44]:
rl_brasil = rl_brasil.replace({
    'Brazil': 'Brasil'
})

In [45]:
rl_brasil.head()

Unnamed: 0,year,regiao,porcentagem,valores
0,1996,Brasil,pctrank,44.221104
1,1998,Brasil,pctrank,45.0
2,2000,Brasil,pctrank,42.288559
3,2002,Brasil,pctrank,43.781094
4,2003,Brasil,pctrank,42.786068


In [46]:
df = pd.concat([rl_mundo, rl_brasil], ignore_index=True)

In [47]:
df

Unnamed: 0,year,regiao,porcentagem,valores
0,1996,Mundo,pctrank,50.0
1,1998,Mundo,pctrank,50.0
2,2000,Mundo,pctrank,50.0
3,2002,Mundo,pctrank,50.0
4,2003,Mundo,pctrank,50.0
5,2004,Mundo,pctrank,50.0
6,2005,Mundo,pctrank,50.0
7,2006,Mundo,pctrank,50.0
8,2007,Mundo,pctrank,50.0
9,2008,Mundo,pctrank,50.0


In [48]:
fig = px.line(
    df,
    x='year',
    y='valores',
    color='regiao',
    color_discrete_sequence=px.colors.qualitative.Dark2
)

fig.update_layout(
    legend=dict(
        title=None
    ),
    yaxis=dict(
        title='Rule of Law',
        range=[0, 100]
    ),
    xaxis=dict(
        title='Ano'
    ),
    font=dict(
        color='black'
    ),
    margin=dict(
        t=5,
        b=5,
        l=5,
        r=5
    )
)

fig.write_image('C:/Users/lazar/Dissertacao-Mestrado-PoderJud-EGDI/figuras/comparacao_wgi_rl_brasil_mundo.png', scale=3)