# Dependências

In [None]:
# Install required Google Cloud packages (commented out as these are typically one-time setup commands)
!pip install gcloud
!gcloud auth application-default login

# Import necessary Python libraries
import pandas as pd                # Data manipulation and analysis
import numpy as np                 # Numerical computing
import time                        # Time-related functions
import os                          # Operating system interfaces
import pandas_gbq                  # Pandas integration with BigQuery
from google.cloud import bigquery  # BigQuery client library
import glob                        # File path pattern matching
import openpyxl                    # Excel file handling
import csv                         # CSV file handling
import re                          # Regular expressions

# Note: The actual imports remain exactly as in the original code

Collecting gcloud
  Downloading gcloud-0.18.3.tar.gz (454 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.4/454.4 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gcloud
  Building wheel for gcloud (setup.py) ... [?25l[?25hdone
  Created wheel for gcloud: filename=gcloud-0.18.3-py3-none-any.whl size=602927 sha256=ae6031e93699fb3e292017ee68bd780f67b36525fdeda8430dd413d198d7afcb
  Stored in directory: /root/.cache/pip/wheels/2a/62/75/3d74209bfebb8805823ae74afa28653aa1ea76d8b5a9d741ff
Successfully built gcloud
Installing collected packages: gcloud
Successfully installed gcloud-0.18.3
Go to the following link in your browser, and complete the sign-in prompts:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fapplicationdefau

# Tratamento

In [None]:
# Read data from a CSV file into a pandas DataFrame.
# 'sep=","' specifies that the columns are separated by commas.
# 'decimal=","' indicates that the comma is used as the decimal separator.
# 'thousands="."' indicates that the period is used as the thousands separator for parsing numbers.
df = pd.read_csv("PEP_DESLIGAMENTOS_2025 - Document_CH955.csv", sep=",", decimal=",", thousands=".")

# Rename the columns to a standardized format (lowercase with underscores) for easier access.
df = df.rename(columns={'Ano':'ano','Agrupamento do desligamento':'tipo_desligamento','Tipo de desligamento':'tipo_detalhado_desligamento','Estabilidade':'status_estabilidade','Quantidade de Desligamentos':'quantidade_desligamentos'})

# Remove rows from the DataFrame where the 'status_estabilidade' column has a missing value (NaN).
df = df.dropna(subset=['status_estabilidade'])

# Convert the 'ano' (year) column to an integer data type.
df['ano'] = df['ano'].astype(int)

# Upload

In [None]:
# Import the bigquery library from google.cloud
from google.cloud import bigquery

# Initialize the BigQuery client, specifying the Google Cloud project ID.
# This client object is the main entry point for interacting with the BigQuery API.
client = bigquery.Client(project='repositoriodedadosgpsp')

# Create a reference to the BigQuery dataset named 'atracao_pre_selecao'.
# This object points to the dataset where the table will be created or updated.
dataset_ref = client.dataset('atracao_pre_selecao')

# Define the schema for the destination BigQuery table.
# The schema is a list of SchemaField objects, where each object defines a column's:
# 1. Name (e.g., 'ano')
# 2. Data type (e.g., 'INTEGER')
# 3. Description (e.g., 'Ano de referencia da informacao')
schema=[bigquery.SchemaField('ano','INTEGER',description='Ano de referencia da informacao'),
 bigquery.SchemaField('tipo_desligamento','STRING',description='Qual o agrupamento do tipo de desligamento'),
 bigquery.SchemaField('tipo_detalhado_desligamento','STRING',description='Detalhes sobre o total de desligamentos'),
 bigquery.SchemaField('status_estabilidade','STRING',description='Se esses servidores possuem estabilidade ou não'),
 bigquery.SchemaField('quantidade_desligamentos','FLOAT',description='Quantidade de desligamentos')
 ]

# Create a reference to the target table within the dataset specified earlier.
# The table will be named 'PEP_GF_desligamentos_v3'.
table_ref = dataset_ref.table('PEP_GF_desligamentos_v3') # table name in the format SOURCE_something_intuitive_data

# Configure the load job by creating a LoadJobConfig object.
# Here, we specify the schema that BigQuery should use for the table. This ensures
# that the columns in BigQuery have the correct data types and descriptions.
job_config = bigquery.LoadJobConfig(schema=schema)

# Start the job to load data from the pandas DataFrame 'df' into the specified BigQuery table ('table_ref').
# The job is configured with the previously defined 'job_config'. This command sends the data to BigQuery.
job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)

# Wait for the load job to complete and retrieve its result.
# This line is blocking and will pause the script's execution until the data upload is finished.
# It's crucial for ensuring the data is fully loaded before the script ends or proceeds.
job.result()