In [1]:
#  Importando Bibliotecas
import os
from google.cloud import bigquery
from dotenv import load_dotenv
import pandas as pd

In [2]:
# Carrega variáveis do .env
load_dotenv()

credencial = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
project_id = os.getenv("PROJECT_ID")
table_id = os.getenv("TABLE_PEDIDO")
table_stg = os.getenv("TABLE_STG")



In [3]:
# Inicializa o cliente do BigQuery usando credenciais de serviço
# O parâmetro 'project_id' especifica o projeto GCP onde as queries serão executadas
client = bigquery.Client.from_service_account_json(credencial, project=project_id)


query = f"""
SELECT *
FROM `{table_stg}`
"""

In [4]:
# Executa e converte pra DataFrame
resultado = client.query(query)
df = resultado.to_dataframe()



In [5]:
# Mostra as primeiras 5 linhas da tabela de datas
df.head()

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,3777.0,1678.0,pepperoni_s,1.0,2015-01-29,13:06:51,9.75,9.75,S,Classic,"Mozzarella Cheese, Pepperoni",The Pepperoni Pizza
1,6746.0,2976.0,pepperoni_s,1.0,2015-02-19,15:29:10,9.75,9.75,S,Classic,"Mozzarella Cheese, Pepperoni",The Pepperoni Pizza
2,7800.0,3430.0,pepperoni_s,1.0,2015-02-27,16:25:26.999999,9.75,9.75,S,Classic,"Mozzarella Cheese, Pepperoni",The Pepperoni Pizza
3,18660.0,8198.0,pepperoni_s,1.0,2015-05-17,18:03:19,9.75,9.75,S,Classic,"Mozzarella Cheese, Pepperoni",The Pepperoni Pizza
4,19687.0,8656.0,pepperoni_s,1.0,2015-05-25,15:16:37,9.75,9.75,S,Classic,"Mozzarella Cheese, Pepperoni",The Pepperoni Pizza


In [6]:
# Exibe as informações sobre o dataframe principal
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48620 entries, 0 to 48619
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   pizza_id           48620 non-null  float64
 1   order_id           48620 non-null  float64
 2   pizza_name_id      48620 non-null  object 
 3   quantity           48620 non-null  float64
 4   order_date         48620 non-null  dbdate 
 5   order_time         48620 non-null  dbtime 
 6   unit_price         48620 non-null  float64
 7   total_price        48620 non-null  float64
 8   pizza_size         48620 non-null  object 
 9   pizza_category     48620 non-null  object 
 10  pizza_ingredients  48620 non-null  object 
 11  pizza_name         48620 non-null  object 
dtypes: dbdate(1), dbtime(1), float64(5), object(5)
memory usage: 4.5+ MB


In [7]:
# Cria df com colunas específicas para dimensão de pedidos
df_dim_pedido = df[['order_id', 'order_date', 'order_time','quantity','unit_price']]

In [8]:
# Converte a coluna order_id para número inteiro
df_dim_pedido["order_id"] = df_dim_pedido["order_id"].astype("int64")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dim_pedido["order_id"] = df_dim_pedido["order_id"].astype("int64")


In [9]:
# Calcula o preço total de cada item
df['total_price'] = df['quantity'] * df['unit_price']

# Agrupa por pedido: soma quantidades e valores
df_dim_pedido = df.groupby(['order_id','order_date','order_time'], as_index=False).agg({
    'quantity': 'sum',
    'total_price': 'sum'
})


In [10]:
# Mostra as primeiras 5 linhas da tabela de datas
df_dim_pedido.head()

Unnamed: 0,order_id,order_date,order_time,quantity,total_price
0,1.0,2015-01-01,11:38:36,1.0,13.25
1,2.0,2015-01-01,11:57:40,5.0,92.0
2,3.0,2015-01-01,12:12:28,2.0,37.25
3,4.0,2015-01-01,12:16:31,1.0,16.5
4,5.0,2015-01-01,12:21:30,1.0,16.5


In [11]:
# Converte order_id para número inteiro
df_dim_pedido["order_id"] = df_dim_pedido["order_id"].astype("int64")

In [12]:
# Mostra as primeiras 5 linhas da tabela de datas
df_dim_pedido.head()

Unnamed: 0,order_id,order_date,order_time,quantity,total_price
0,1,2015-01-01,11:38:36,1.0,13.25
1,2,2015-01-01,11:57:40,5.0,92.0
2,3,2015-01-01,12:12:28,2.0,37.25
3,4,2015-01-01,12:16:31,1.0,16.5
4,5,2015-01-01,12:21:30,1.0,16.5


In [13]:
# Converte quantity para número inteiro
df_dim_pedido["quantity"] = df["quantity"].astype("int64")

In [14]:
# Mostra informações da dimensão de pedidos
df_dim_pedido.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21350 entries, 0 to 21349
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   order_id     21350 non-null  int64  
 1   order_date   21350 non-null  dbdate 
 2   order_time   21350 non-null  dbtime 
 3   quantity     21350 non-null  int64  
 4   total_price  21350 non-null  float64
dtypes: dbdate(1), dbtime(1), float64(1), int64(2)
memory usage: 834.1 KB


In [15]:
#df_dim_pedido= df_dim_pedido.drop_duplicates(subset=['order_id'])

In [16]:
# Mostra as primeiras 5 linhas da tabela de datas
df_dim_pedido.head()

Unnamed: 0,order_id,order_date,order_time,quantity,total_price
0,1,2015-01-01,11:38:36,1,13.25
1,2,2015-01-01,11:57:40,1,92.0
2,3,2015-01-01,12:12:28,1,37.25
3,4,2015-01-01,12:16:31,1,16.5
4,5,2015-01-01,12:21:30,1,16.5


In [17]:
# Ordenar pelo order_id e recriar o sk_order sequencial
df_dim_pedido = df_dim_pedido.drop_duplicates(subset=['order_id']).sort_values('order_id').reset_index(drop=True)
df_dim_pedido['sk_order'] = df_dim_pedido.index + 1

In [18]:
# Seleciona e organiza as colunas da dimensão de pedidos
df_dim_pedido = df_dim_pedido[['sk_order','order_id','total_price']]

In [19]:
# Mostra as primeiras 5 linhas da tabela de datas
df_dim_pedido.head()

Unnamed: 0,sk_order,order_id,total_price
0,1,1,13.25
1,2,2,92.0
2,3,3,37.25
3,4,4,16.5
4,5,5,16.5


In [20]:
# Mostra informações da dimensão de pedidos
df_dim_pedido.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21350 entries, 0 to 21349
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   sk_order     21350 non-null  int64  
 1   order_id     21350 non-null  int64  
 2   total_price  21350 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 500.5 KB


In [21]:


# Configuração do job: substitui a tabela existente
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE"
)

# Carrega o DataFrame no BigQuery
job = client.load_table_from_dataframe(
    df_dim_pedido,
    table_id,
    job_config=job_config
)
job.result()  # espera o job terminar

print("dim_pedido carregada com sucesso!")


dim_pedido carregada com sucesso!


In [22]:
# Conta quantos registros tem na tabela de pedidos
query_check = f"SELECT COUNT(*) AS total FROM `{table_id}`"
df_check = client.query(query_check).to_dataframe()
print(df_check)




   total
0  21350
