# Dependências

In [None]:
# Install required Google Cloud packages (commented out as these are typically one-time setup commands)
# !pip install gcloud
# !gcloud auth application-default login

# Import necessary Python libraries
import pandas as pd                # Data manipulation and analysis
import numpy as np                 # Numerical computing
import time                        # Time-related functions
import os                          # Operating system interfaces
import pandas_gbq                  # Pandas integration with BigQuery
from google.cloud import bigquery  # BigQuery client library
import glob                        # File path pattern matching
import openpyxl                    # Excel file handling
import csv                         # CSV file handling
import re                          # Regular expressions

# Note: The actual imports remain exactly as in the original code

# Tratamento

In [None]:
# Load Public Safety data from Excel file
df = pd.read_excel("Base_Estadic_2023(2).xlsx", sheet_name='Segurança Pública')

# Select relevant columns for military police data
df = df[['Sigla UF', 'PopUF', 'ESEG273']]

# Rename columns to more descriptive names
df = df.rename(columns={
    'ESEG273': 'policia_militar_efetivo_total',
    'PopUF': 'pop_projetada',
    'Sigla UF': 'sigla_uf'
})

# Handle refused data by replacing with 0
df['policia_militar_efetivo_total'] = df['policia_militar_efetivo_total'].replace('Recusa', 0)

# Convert to integer (note: needs assignment to take effect)
df['policia_militar_efetivo_total'] = df['policia_militar_efetivo_total'].astype(str).astype(int)

# Calculate military police per 1000 population rate
df['taxa'] = df['policia_militar_efetivo_total'] / df['pop_projetada'] * 1000

# Add metadata columns
df['ano'] = 2023  # Year column
df['tema'] = 'seguranca_publica'  # Theme identifier

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 6 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   sigla_uf                       27 non-null     object 
 1   pop_projetada                  27 non-null     int64  
 2   policia_militar_efetivo_total  27 non-null     int64  
 3   taxa                           27 non-null     float64
 4   ano                            27 non-null     int64  
 5   tema                           27 non-null     object 
dtypes: float64(1), int64(3), object(2)
memory usage: 1.4+ KB


# Upload

In [None]:
# Define the BigQuery table schema with Portuguese descriptions for military police data
schema = [
    bigquery.SchemaField('ano', 'INTEGER', description='Ano de implementação da legislação.'),
    bigquery.SchemaField('tema', 'STRING', description='Tema de origem daquele dado na ESTADIC'),
    bigquery.SchemaField('sigla_uf', 'STRING', description='Sigla da Unidade da Federação.'),
    bigquery.SchemaField('policia_militar_efetivo_total', 'INTEGER',
                       description='Quantitativo total de efetivo de policiais militares.'),
    bigquery.SchemaField('pop_projetada', 'INTEGER',
                       description='População projetada pelo IBGE para o ano de referência'),
    bigquery.SchemaField('taxa', 'FLOAT',
                       description='taxa de policiais por 1000 habitantes')
]

# Initialize BigQuery client connection
client = bigquery.Client(project='repositoriodedadosgpsp')

# Create reference to target dataset
dataset_ref = client.dataset('perfil_remuneracao')

# Create reference to target table following naming convention:
# FONTE_algo_intuitivo_dado (ESTADIC_policial_militar_habitantes_v1)
table_ref = dataset_ref.table('ESTADIC_policial_militar_habitantes_v1')

# Configure the load job with our schema definition
job_config = bigquery.LoadJobConfig(schema=schema)

# Execute the load job to upload DataFrame to BigQuery
job = client.load_table_from_dataframe(
    dataframe=df,
    destination=table_ref,
    job_config=job_config
)

# Wait for the job to complete
job.result()