# Data Analysis of CEAPS

In [1]:
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go
import ipywidgets as widgets
from IPython.display import display

py.init_notebook_mode(connected=True)

In [2]:
# Chart default layout
default_layout = dict(
    titlefont=dict(size=18, color="darkblue"),
    tickfont=dict(size=14, color="black"),
    showgrid=True,
    zeroline=True,
    showline=True,
    mirror=True,
    gridcolor="lightgrey",
    gridwidth=1,
    zerolinecolor="grey",
    zerolinewidth=2,
    linecolor="black",
    linewidth=2,
)

In [3]:
# Reading the data
original_df = pd.read_csv("../data/data.csv")
original_df.head()

Unnamed: 0,ANO,MES,SENADOR,TIPO_DESPESA,CNPJ_CPF,FORNECEDOR,DOCUMENTO,DATA,DETALHAMENTO,VALOR_REEMBOLSADO,COD_DOCUMENTO
0,2014,1,ACIR GURGACZ,"Aluguel de imóveis para escritório político, c...",05.914.650/0001-66,ELETOBRAS DISTRIBUIÇĂO RONDÔNIA,452163-3,2014-01-10,Despesa com pagamento de Energia Elétrica para...,52.38,869442.0
1,2014,1,ACIR GURGACZ,"Aluguel de imóveis para escritório político, c...",05.914.650/0001-66,ELETROBRAS DISTRIBUIÇĂO RONDÔNIA,000060693,2014-01-18,Despesa com pagamento de Energia Elétrica para...,158.26,869445.0
2,2014,1,ACIR GURGACZ,"Aluguel de imóveis para escritório político, c...",004.948.028-63,GILBERTO PISELO DO NASCIMENTO,001/14,2014-01-31,Despesa com pagamento de aluguel de imóvel par...,5000.0,869452.0
3,2014,1,ACIR GURGACZ,"Aluguel de imóveis para escritório político, c...",05.423.963/0001-11,OI MÓVEL S.A.,493020122,2014-01-20,Despesa com pagamento de telefones fixo e móve...,401.71,869446.0
4,2014,1,ACIR GURGACZ,Divulgaçăo da atividade parlamentar,84.721.745/0001-30,ALERTA COMUNICAÇĂO O. F. CALADO EDIÇƠES,000127,2014-01-20,Despesa com material impresso para divulgaçăo ...,726.88,896991.0



## Query the data

### Choose the senator

In [4]:
senator = widgets.Dropdown(
    options=original_df["SENADOR"].unique().tolist() + ["All"],
    value="All",
    description="Senator:",
    disabled=False,
)
display(senator)

Dropdown(description='Senator:', index=291, options=('ACIR GURGACZ', 'AÉCIO NEVES', 'ALFREDO NASCIMENTO', 'ALO…

### Choose the supplier

TODO: the dropdown was not working. Require fix.

### Choose the year

In [5]:
range_year = widgets.IntRangeSlider(
    value=[2010, 2022], min=original_df["ANO"].min(), max=original_df["ANO"].max(), step=1, description="Years"
)
display(range_year)

IntRangeSlider(value=(2010, 2022), description='Years', max=2022, min=2008)

### **After setting the valus above, run the cell below and the others!**

In [45]:
# Querying the data

if senator.value == "All":
    df = original_df[original_df["ANO"].isin(range(range_year.value[0], range_year.value[1] + 1))]
else:
    df = original_df[original_df["SENADOR"] == senator.value]
    
print(
    f"Getting data from {range_year.value[0]} to {range_year.value[1]} for {senator.value}.\n"
)
print(df['ANO'].value_counts().sort_index())

Getting data from 2010 to 2022 for AÉCIO NEVES.

ANO
2011    164
2012    303
2013    361
2014    152
2015    264
2016    307
2017    231
2018    186
2019     15
Name: count, dtype: int64


## Reibursement by year

In [46]:
# Define x and y axis layout
xaxis_layout = default_layout.copy()
xaxis_layout["title"] = "Year"

yaxis_layout = default_layout.copy()
yaxis_layout["title"] = "Total Reimbursement (R$)"

In [47]:
# Define data
group_by_year = df.groupby("ANO")

# Sum of values by year
reimbursement_by_year = group_by_year["VALOR_REEMBOLSADO"].sum().reset_index()
reimbursement_by_year

Unnamed: 0,ANO,VALOR_REEMBOLSADO
0,2011,180990.41
1,2012,252715.42
2,2013,286004.86
3,2014,214075.14
4,2015,268031.78
5,2016,295335.85
6,2017,214164.59
7,2018,247757.97
8,2019,19739.01


In [48]:
# Plot the data
trace = go.Scatter(
    x=reimbursement_by_year["ANO"],
    y=reimbursement_by_year["VALOR_REEMBOLSADO"],
    mode="markers+lines",
    marker=dict(size=10, color="blue", symbol="circle"),
    line=dict(width=2, color="blue"),
)

# Set chart layout
layout = go.Layout(
    title="Reimbursement by Year",
    titlefont=dict(size=24, color="darkblue"),
    xaxis=xaxis_layout,
    yaxis=yaxis_layout,
    hovermode="closest",
    plot_bgcolor="white",
)

py.iplot({"data": [trace], "layout": layout})

## Distribution of Types of Expenses

In [49]:
# Define x and y axis layout
xaxis_layout = default_layout.copy()
# xaxis_layout["title"] = "Type of expense"

yaxis_layout = default_layout.copy()
yaxis_layout["title"] = "Quantity"

In [50]:
# Define the data
df["TIPO_DESPESA"] = df["TIPO_DESPESA"].astype(str)
count_by_type = df["TIPO_DESPESA"].value_counts().reset_index()
count_by_type.columns = ["TIPO_DESPESA", "COUNT"]
count_by_type



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,TIPO_DESPESA,COUNT
0,"Aluguel de imóveis para escritório político, c...",736
1,"Passagens aéreas, aquáticas e terrestres nacio...",497
2,"Locomoçăo, hospedagem, alimentaçăo, combustíve...",401
3,Aquisiçăo de material de consumo para uso no e...,289
4,"Contrataçăo de consultorias, assessorias, pesq...",44
5,Divulgaçăo da atividade parlamentar,15
6,Serviços de Segurança Privada,1


In [51]:
# Plot the data
trace = go.Bar(
    x=count_by_type["TIPO_DESPESA"],
    y=count_by_type["COUNT"],
    marker=dict(color="blue"),
    text=count_by_type["COUNT"],
    textposition="auto",
)

# Set chart layout
layout = go.Layout(
    title="Amount of expenses by type",
    titlefont=dict(size=24, color="darkblue"),
    xaxis=dict(
        xaxis_layout,
        showticklabels=False,  # Turn off X axis values
    ),
    yaxis=yaxis_layout,
    hovermode="closest",
    plot_bgcolor="white",
)

py.iplot({"data": [trace], "layout": layout})

## Sum of the expenses by type

In [52]:
# Define x and y axis layout
xaxis_layout = default_layout.copy()
# xaxis_layout["title"] = "Type of expense"

yaxis_layout = default_layout.copy()
yaxis_layout["title"] = "Amount reimbursed (R$)"

In [53]:
# Define the data
amount_by_type = (
    df.groupby("TIPO_DESPESA")["VALOR_REEMBOLSADO"]
    .sum()
    .reset_index()
    .sort_values("VALOR_REEMBOLSADO", ascending=False)
)
amount_by_type

Unnamed: 0,TIPO_DESPESA,VALOR_REEMBOLSADO
0,"Aluguel de imóveis para escritório político, c...",1503041.54
5,"Passagens aéreas, aquáticas e terrestres nacio...",273286.47
1,Aquisiçăo de material de consumo para uso no e...,75184.92
2,"Contrataçăo de consultorias, assessorias, pesq...",54639.0
4,"Locomoçăo, hospedagem, alimentaçăo, combustíve...",49407.38
3,Divulgaçăo da atividade parlamentar,23018.6
6,Serviços de Segurança Privada,237.12


In [54]:
# Plot the data
trace = go.Bar(
    x=amount_by_type["TIPO_DESPESA"],
    y=amount_by_type["VALOR_REEMBOLSADO"],
    marker=dict(color="blue"),
    text=amount_by_type["VALOR_REEMBOLSADO"],
    textposition="auto",
)

# Set chart layout
layout = go.Layout(
    title="Amount reimbursed by type of expense",
    titlefont=dict(size=24, color="darkblue"),
    xaxis=dict(
        xaxis_layout,
        showticklabels=False,  # Turn off X axis values
    ),
    yaxis=yaxis_layout,
    hovermode="closest",
    plot_bgcolor="white",
)

py.iplot({"data": [trace], "layout": layout})

## Suppliers of the services (top 20)

In [55]:
# Define x and y axis layout
xaxis_layout = default_layout.copy()
xaxis_layout["title"] = "Supplier"

yaxis_layout = default_layout.copy()
yaxis_layout["title"] = "Amount paid (R$)"

In [56]:
# Define the data
amount_by_supplier = (
    df.groupby("FORNECEDOR")["VALOR_REEMBOLSADO"]
    .sum()
    .reset_index()
    .sort_values("VALOR_REEMBOLSADO", ascending=False)
)
amount_by_supplier[:20]

Unnamed: 0,FORNECEDOR,VALOR_REEMBOLSADO
13,ASAMAR S A,536443.45
45,CONDOMINIO DO EDIFICIO ASAMAR,367094.88
17,ASAMAR S/A,107688.87
15,ASAMAR S.A,107200.34
16,ASAMAR S.A.,84002.3
192,TAM Linhas Aéreas S/A,57609.24
7,ADRIA VIAGENS E TURISMO LTDA ME,54160.22
122,JOĂO FRANCISCO MEIRELLES SILVA,49000.0
215,Telefônica Brasil S.A.,44371.09
251,VRG Linhas Aéreas S/A,44351.86


In [57]:
# Plot the data
trace = go.Bar(
    x=amount_by_supplier["FORNECEDOR"][:20],
    y=amount_by_supplier["VALOR_REEMBOLSADO"][:20],
    marker=dict(color="blue"),
    text=amount_by_supplier["VALOR_REEMBOLSADO"][:20],
    textposition="auto",
)

# Set chart layout
layout = go.Layout(
    title="Amount reimbursed by supplier",
    titlefont=dict(size=24, color="darkblue"),
    xaxis=dict(
        xaxis_layout,
        showticklabels=False,  # Turn off X axis values
    ),
    yaxis=yaxis_layout,
    hovermode="closest",
    plot_bgcolor="white",
)

py.iplot({"data": [trace], "layout": layout})