# Dependência

In [None]:
# Install required Google Cloud packages (commented out as these are typically one-time setup commands)
# !pip install gcloud
# !gcloud auth application-default login

# Import necessary Python libraries
import pandas as pd                # Data manipulation and analysis
import numpy as np                 # Numerical computing
import time                        # Time-related functions
import os                          # Operating system interfaces
import pandas_gbq                  # Pandas integration with BigQuery
from google.cloud import bigquery  # BigQuery client library
import glob                        # File path pattern matching
import openpyxl                    # Excel file handling
import csv                         # CSV file handling
import re                          # Regular expressions

# Note: The actual imports remain exactly as in the original code

# Tratamento

In [None]:
# Load Public Safety data from Excel file
df = pd.read_excel("Base_Estadic_2023(2).xlsx", sheet_name='Segurança Pública')

# Select and rename relevant columns
df = df[['Sigla UF', 'PopUF', 'ESEG491', 'ESEG492', 'ESEG493']]
df = df.rename(columns={
    'ESEG491': 'policia_civil_efetivo_homem',
    'ESEG492': 'policia_civil_efetivo_mulher',
    'ESEG493': 'policia_civil_efetivo_total',
    'PopUF': 'pop_projetada',
    'Sigla UF': 'sigla_uf'
})

# Handle missing/refused data by replacing with 0
df['policia_civil_efetivo_homem'] = df['policia_civil_efetivo_homem'].replace(['Recusa', 'Não informou'], 0)
df['policia_civil_efetivo_mulher'] = df['policia_civil_efetivo_mulher'].replace(['Recusa', 'Não informou'], 0)

# Convert to numeric
df['policia_civil_efetivo_homem'] = df['policia_civil_efetivo_homem'].astype(str).astype(int)
df['policia_civil_efetivo_mulher'] = df['policia_civil_efetivo_mulher'].astype(str).astype(int)

# Add year column
df['ano'] = 2023

# Create separate dataframes for male and female officers
df_mulheres = df.drop('policia_civil_efetivo_homem', axis=1)
df_homens = df.drop('policia_civil_efetivo_mulher', axis=1)

# Standardize column names and add gender column
df_homens = df_homens.rename(columns={'policia_civil_efetivo_homem': 'quantidade_pessoas'})
df_homens['genero'] = 'Masculino'

df_mulheres = df_mulheres.rename(columns={'policia_civil_efetivo_mulher': 'quantidade_pessoas'})
df_mulheres['genero'] = 'Feminino'

# Combine male and female dataframes
df = pd.concat([df_homens, df_mulheres])

# Add theme column and drop total column
df['tema'] = 'seguranca_publica'
df = df.drop('policia_civil_efetivo_total', axis=1)

# Upload

In [None]:
# Define the BigQuery table schema with field types and descriptions
schema = [
    bigquery.SchemaField('ano','INTEGER',description='Ano de implementação da legislação.'),
    bigquery.SchemaField('tema','STRING',description='Tema de origem daquele dado na ESTADIC'),
    bigquery.SchemaField('sigla_uf','STRING',description='Sigla da Unidade da Federação.'),
    bigquery.SchemaField('quantidade_pessoas','INTEGER',description='Quantitativo total de pessoas'),
    bigquery.SchemaField('genero','STRING',description='Genero daquele quantitativo')
]

# Initialize connection to BigQuery client for the specified project
client = bigquery.Client(project='repositoriodedadosgpsp')

# Create reference to the target dataset
dataset_ref = client.dataset('perfil_remuneracao')

# Create reference to the target table following naming convention: SOURCE_descriptive_name_version
table_ref = dataset_ref.table('ESTADIC_policial_civil_genero_v1')

# Configure the data upload job with the defined schema
job_config = bigquery.LoadJobConfig(schema=schema)

# Execute the job to upload DataFrame to BigQuery
job = client.load_table_from_dataframe(
    dataframe=df,
    destination=table_ref,
    job_config=job_config
)

# Wait for the job to complete
job.result()