# Dependências

In [None]:
# Install required Google Cloud packages (commented out as these are typically one-time setup commands)
!pip install gcloud
!gcloud auth application-default login

# Import necessary Python libraries
import pandas as pd                # Data manipulation and analysis
import numpy as np                 # Numerical computing
import time                        # Time-related functions
import os                          # Operating system interfaces
import pandas_gbq                  # Pandas integration with BigQuery
from google.cloud import bigquery  # BigQuery client library
import glob                        # File path pattern matching
import openpyxl                    # Excel file handling
import csv                         # CSV file handling
import re                          # Regular expressions

# Note: The actual imports remain exactly as in the original code

Collecting gcloud
  Downloading gcloud-0.18.3.tar.gz (454 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/454.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m450.6/454.4 kB[0m [31m19.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.4/454.4 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gcloud
  Building wheel for gcloud (setup.py) ... [?25l[?25hdone
  Created wheel for gcloud: filename=gcloud-0.18.3-py3-none-any.whl size=602927 sha256=6fb57ab5f0a164a0f817042a71c9d11f734ae90c30ea231ba7a837aa50e0b4da
  Stored in directory: /root/.cache/pip/wheels/2a/62/75/3d74209bfebb8805823ae74afa28653aa1ea76d8b5a9d741ff
Successfully built gcloud
Installing collected packages: gcloud
Successfully installed gcloud-0.18.3
Go to the following link in your browser, and 

# Tratamento

In [None]:
# Import necessary libraries
import pandas as pd
import pandas_gbq

# Define a SQL query to select all data from the 2025 table for military reserve and reformed personnel.
query_reserva = """ SELECT * FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2025_4_militares_reserva_reforma_cadastro`
            """
# Execute the query and load the result into a DataFrame named 'df_reserva'.
# Note: This DataFrame is not used in the rest of the provided script.
df_reserva = pandas_gbq.read_gbq(query_reserva, project_id='repositoriodedadosgpsp')

# Define the main SQL query to be executed in Google BigQuery.
# This query combines data on military reserve/reformed personnel from multiple yearly tables (2020-2025) using UNION ALL.
# It renames columns for clarity (e.g., ANO to ano) and adds a 'total' column with a value of 1 for easy counting of records.
query = """
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
            1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2020_abr_militares_reserva_reforma_cadastro`
            UNION ALL
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
            1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2021_abr_militares_reserva_reforma_cadastro`
            UNION ALL
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
            1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2022_abr_militares_reserva_reforma_cadastro`
            UNION ALL
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
            1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2023_abr_militares_reserva_reforma_cadastro`
               UNION ALL
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
           1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2024_abr_militares_reserva_reforma_cadastro`
           UNION ALL
        SELECT ANO as ano, MES as mes, TIPO_APOSENTADORIA as tipo_aposentadoria, ORG_LOTACAO as org_lotacao,
           1 as total FROM `repositoriodedadosgpsp.portal_transparencia_cgu.2025_4_militares_reserva_reforma_cadastro`
        """
# Execute the main query and load the combined result into a pandas DataFrame called 'df'.
df = pandas_gbq.read_gbq(query, project_id='repositoriodedadosgpsp')

# Convert the 'tipo_aposentadoria' (retirement type) column to title case (e.g., "RESERVA" becomes "Reserva").
df['tipo_aposentadoria'] = df['tipo_aposentadoria'].str.title()

# Creates a copy of the column to avoid the SettingWithCopyWarning
# (The line above is the original comment in Portuguese)
# Ensure the 'tipo_aposentadoria' column is of string data type to allow for string operations.
df['tipo_aposentadoria'] = df['tipo_aposentadoria'].astype(str)

# Corrects 'Doen A' to 'Doença'
# (The line above is the original comment in Portuguese)
# Replace occurrences of 'Doen A' with 'Doença' (Disease) to fix character encoding issues from the source.
df['tipo_aposentadoria'] = df['tipo_aposentadoria'].str.replace('Doen A', 'Doença', regex=False)

# Corrects 'Decis O' to 'Decisão'
# (The line above is the original comment in Portuguese)
# Replace occurrences of 'Decis O' with 'Decisão' (Decision) to fix another character encoding issue.
df['tipo_aposentadoria'] = df['tipo_aposentadoria'].str.replace('Decis O', 'Decisão', regex=False)

# Upload

In [None]:
# Import the bigquery library from google.cloud
from google.cloud import bigquery

# Initialize the BigQuery client, specifying the Google Cloud project ID.
# This client object is the main entry point for interacting with the BigQuery API.
client = bigquery.Client(project='repositoriodedadosgpsp')

# Create a reference to the BigQuery dataset named 'perfil_remuneracao'.
# This object points to the dataset where the table will be created or updated.
dataset_ref = client.dataset('perfil_remuneracao')

# Define the schema for the destination BigQuery table.
# The schema is a list of SchemaField objects, where each object defines a column's:
# 1. Name (e.g., 'ano')
# 2. Data type (e.g., 'INTEGER')
# 3. Description (e.g., 'Ano de referência da observação')
schema=[bigquery.SchemaField('ano','INTEGER',description='Ano de referência da observação'),
        bigquery.SchemaField('mes','INTEGER',description='Mês de referência da observação'),
        bigquery.SchemaField('tipo_aposentadoria','STRING',description='Tipo de aposentadoria'),
        bigquery.SchemaField('org_lotacao','STRING',description='Órgão de lotação'),
        bigquery.SchemaField('total','INTEGER',description='Quantidade total da observação')
        ]

# Create a reference to the target table within the dataset specified earlier.
# The table will be named 'MILITARES_reserva_reforma_total_v2'.
table_ref = dataset_ref.table('MILITARES_reserva_reforma_total_v2')

# Configure the load job by creating a LoadJobConfig object.
# Here, we specify the schema that BigQuery should use for the table. This ensures
# that the columns in BigQuery have the correct data types and descriptions.
job_config = bigquery.LoadJobConfig(schema=schema)

# Start the job to load data from the pandas DataFrame 'df' into the specified BigQuery table ('table_ref').
# The job is configured with the previously defined 'job_config'. This command sends the data to BigQuery.
job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)

# Wait for the load job to complete and retrieve its result.
# This line is blocking and will pause the script's execution until the data upload is finished.
# It's crucial for ensuring the data is fully loaded before the script ends or proceeds.
job.result()