# Dependências

In [None]:
# Install required Google Cloud packages (commented out as these are typically one-time setup commands)
!pip install gcloud
!gcloud auth application-default login

# Import necessary Python libraries
import pandas as pd                # Data manipulation and analysis
import numpy as np                 # Numerical computing
import time                        # Time-related functions
import os                          # Operating system interfaces
import pandas_gbq                  # Pandas integration with BigQuery
from google.cloud import bigquery  # BigQuery client library
import glob                        # File path pattern matching
import openpyxl                    # Excel file handling
import csv                         # CSV file handling
import re                          # Regular expressions

# Note: The actual imports remain exactly as in the original code

Collecting gcloud
  Downloading gcloud-0.18.3.tar.gz (454 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/454.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.6/454.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.4/454.4 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gcloud
  Building wheel for gcloud (setup.py) ... [?25l[?25hdone
  Created wheel for gcloud: filename=gcloud-0.18.3-py3-none-any.whl size=602927 sha256=41c3de64b46d274718c2d8f429d6235acfabcda659f06fdf4cf950748047e752
  Stored in directory: /root/.cache/pip/wheels/2a/62/75/3d74209bfebb8805823ae74afa28653aa1ea76d8b5a9d741ff
Successfully built gcloud
Installing collected packages: gcloud
Successfully installed gcloud-0.18.3
Go to the following link in your brow

# Tratamento

In [None]:
# Read data from a comma-separated CSV file into a pandas DataFrame.
df = pd.read_csv("PEP_INGRESSOS_GDF_2025 - Document_CH338.csv", sep=",")

# Filter the DataFrame to keep only the rows where the 'Ident. GDF' column is equal to 'Gov. Federal'.
df=df[df['Ident. GDF']=='Gov. Federal']

# Select a specific subset of columns from the DataFrame and define their order.
df=df[['Ano',  "Tipo de Ingresso'", 'Regime Jurídico', 'Sexo',
       'Grupo do Cargo', 'Quantidade de Ingressos']]

# In the 'Sexo' column, replace the value 'F' with 'Feminino' (Female).
df['Sexo']= np.where(df['Sexo']=='F',"Feminino",df['Sexo'])

# In the 'Sexo' column, replace the value 'M' with 'Masculino' (Male).
df['Sexo']= np.where(df['Sexo']=='M',"Masculino",df['Sexo'])

# Display the current state of the DataFrame. In an interactive environment (like a Jupyter notebook), this will print the DataFrame.
df

# Rename the columns to a standardized format (lowercase with underscores).
df = df.rename(columns={'Ano':'ano',"Tipo de Ingresso'":'tipo_ingresso','Regime Jurídico':'regime_juridico','Sexo':'genero','Grupo do Cargo':'grupo_cargo','Quantidade de Ingressos':'quantidade_ingressos'})

# Convert the 'ano' column to an integer data type.
df['ano'] = df['ano'].astype(int)

# Upload

In [None]:
# Initialize the BigQuery client, specifying the Google Cloud project ID.
# This client object is the main entry point for interacting with the BigQuery API.
client = bigquery.Client(project='repositoriodedadosgpsp')

# Create a reference to the BigQuery dataset named 'atracao_pre_selecao'.
# This object points to the dataset where a table might be created or updated.
dataset_ref = client.dataset('atracao_pre_selecao')

# Define the schema for a destination BigQuery table.
# The schema is a list of SchemaField objects, where each object defines a column's:
# 1. Name (e.g., 'ano')
# 2. Data type (e.g., 'INTEGER')
# 3. Description (e.g., 'Ano de referencia da informacao')
schema=[bigquery.SchemaField('ano','INTEGER',description='Ano de referencia da informacao'),
 bigquery.SchemaField('tipo_ingresso','STRING',description='forma de ingresso no serviço público'),
 bigquery.SchemaField('regime_juridico','STRING',description='regime jurídico'),
 bigquery.SchemaField('genero','STRING',description='Qual gênero do ingressante'),
 bigquery.SchemaField('grupo_cargo','STRING',description='a qual grupo de cargo pertence'),
 bigquery.SchemaField('quantidade_ingressos','INTEGER',description='Quantidade de ingressos')
 ]

# Note: The following block redefines the exact same schema as above, which is redundant.
# Define the schema for a destination BigQuery table.
schema=[bigquery.SchemaField('ano','INTEGER',description='Ano de referencia da informacao'),
 bigquery.SchemaField('tipo_ingresso','STRING',description='forma de ingresso no serviço público'),
 bigquery.SchemaField('regime_juridico','STRING',description='regime jurídico'),
 bigquery.SchemaField('genero','STRING',description='Qual gênero do ingressante'),
 bigquery.SchemaField('grupo_cargo','STRING',description='a qual grupo de cargo pertence'),
 bigquery.SchemaField('quantidade_ingressos','INTEGER',description='Quantidade de ingressos')
 ]