In [134]:
# 1º Step: Obtain data from PATH_DATA and load it into a DataFrame.

import pandas as pd 
import numpy as np

PATH_DATA = 'https://www.ispdados.rj.gov.br/Arquivos/BaseDPEvolucaoMensalCisp.csv'

try:
    df_robs = pd.read_csv(PATH_DATA, sep=';', encoding='iso-8859-1')
    print(f'Data obtained from PATH_DATA sucessfully. DataFrame loaded with {df_robs.shape[0]} rows and {df_robs.shape[1]} columns.')

except Exception as e:
    print(f'Error while obtaining data from PATH_DATA. {e}')


Data obtained from PATH_DATA sucessfully. DataFrame loaded with 36355 rows and 63 columns.


In [146]:
#2º Step: Dellimiting and groupingby DataFrame.

try:
    data_correction = df_robs['regiao'].str.startswith('Grande Niter', na=False)
    df_robs.loc[data_correction, 'regiao'] = 'Grande Niterói'
    df_robs = df_robs[['cisp', 'regiao', 'munic', 'roubo_veiculo']]
    df_robs_gb = df_robs.groupby(['cisp', 'regiao', 'munic']).sum(['roubo_veiculo']).reset_index()
    df_robs_gb_filtered = df_robs_gb.sort_values(by='munic', ascending=True)
    print(f'DataFrame grouped by by cisp, regiao, and munic with {df_robs_gb_filtered.shape[0]} rows and {df_robs_gb_filtered.shape[1]} columns.')

except Exception as e:
    print(f'Error while dellimiting and groupingby DataFrame. {e}')

DataFrame grouped by by cisp, regiao, and munic with 146 rows and 4 columns.


In [139]:
#3º Step: Write all variables from a selected column into a archive .txt.

try:
    with open('municipalities.txt', 'w', encoding='utf-8-sig') as municipalities:
        municipalities.write('Municipalities:\n\n')
        municipalities.write('\n'.join(df_robs_gb_filtered['munic'].unique()))
    print('Municipalities written to municipalities.txt successfully.')

except Exception as e:
    print(f'Error while trying to write municipalities to municipalities.txt. {e}')

Municipalities written to municipalities.txt successfully.


In [138]:
# 4º Step: Write all variables from dataframe into a archive .txt.

try:
    with open('variables.txt', 'w', encoding='iso-8859-1') as titles:
        titles.write('Variables:\n\n')
        titles.write('\n'.join(df_robs_gb_filtered.head(0)))
    print('Variables written to variables.txt successfully.')

except Exception as e:
    print(f'Error while trying to write variables to variables.txt. {e}')

Variables written to variables.txt successfully.


In [141]:
# 5º Step: Obtain measures of central tendency.

try:
    array_robs = np.array(df_robs_gb_filtered['roubo_veiculo'])
    total = np.sum(array_robs)
    mean = np.mean(array_robs)
    median = np.median(array_robs)
    min = np.min(array_robs)
    max = np.max(array_robs)

    print('Measures of central tendency:'+'\n')
    print(f'Total: {total}')
    print(f'Mean: {mean}')
    print(f'Median: {median}')
    print(f'Min: {min}')
    print(f'Max: {max}')

except Exception as e:
    print(f'Error while obtaining measures of central tendency. {e}')

Measures of central tendency:

Total: 697681.00
Mean: 3751.0
Median: 307.0
Min: 0.0
Max: 36274.0


In [143]:
# 6º Step: Obtain measures of dispersion.

try:
    std_dev = np.std(array_robs)
    var = np.var(array_robs)
    q1 = np.percentile(array_robs, 25)
    q3 = np.percentile(array_robs, 75)
    iqr = q3 - q1

    print('\nMeasures of dispersion:'+'\n')
    print(f'Standard Deviation: {std_dev:.2f}')
    print(f'Variance: {var:.2f}')
    print(f'Q1: {q1:.2f}')
    print(f'Q3: {q3:.2f}')
    print(f'IQR: {iqr:.2f}')

except Exception as e:
    print(f'Error while obtaining measures of dispersion. {e}')


Measures of dispersion:

Standard Deviation: 6793.85
Variance: 46156448.18
Q1: 29.25
Q3: 3883.25
IQR: 3854.00


In [148]:
# 7º Step: Obtain mayors and minors.

try:
    # Gerar um dataframe com os maiores
    # Copy() quando preciar alterar um dataframe já filtrado
    df_rob_mayors =  df_robs_gb_filtered[df_robs_gb_filtered['roubo_veiculo'] > q3].copy()
    df_rob_mayors['flag'] = 'mayors'
    # print(df_maiores)

    # Gerar um dataframe com os menores
    df_robs_minors = df_robs_gb_filtered[df_robs_gb_filtered['roubo_veiculo'] < q1].copy()
    df_robs_minors['flag'] = 'minors'
    # print(df_menores)

    # Concatenar os dois dataframes
    df_roubos_flags = pd.concat([df_rob_mayors, df_robs_minors], ignore_index=True)

    display(df_roubos_flags)
except Exception as e:
    print(f'Error while obtaining mayors and minors: {e}')


Unnamed: 0,cisp,regiao,munic,roubo_veiculo,flag
0,54,Baixada Fluminense,Belford Roxo,23967,mayors
1,60,Baixada Fluminense,Duque de Caxias,19513,mayors
2,59,Baixada Fluminense,Duque de Caxias,36274,mayors
3,62,Baixada Fluminense,Duque de Caxias,8667,mayors
4,71,Interior,Itaboraí,6169,mayors
5,50,Baixada Fluminense,Itaguaí,3999,mayors
6,53,Baixada Fluminense,Mesquita,8190,mayors
7,57,Baixada Fluminense,Nilópolis,12257,mayors
8,77,Grande Niterói,Niterói,4122,mayors
9,78,Grande Niterói,Niterói,8401,mayors
