In [3]:
# Import necessary libraries
import pandas as pd
from google.colab import files
from google.cloud import bigquery
from google.colab import auth

In [4]:
# Parameters for Project, Dataset, and Table
PROJECT_ID = "meu-projeto-dbt"
DATASET_ID = "sample_data"
TABLE_ID = "marketing_data"

In [5]:
# Upload the CSV file from your computer
try:
    uploaded = files.upload()  # Opens a window to select the file
    if uploaded:
        csv_filename = list(uploaded.keys())[0]  # Get the name of the uploaded file
        print(f"File '{csv_filename}' successfully uploaded!")
    else:
        raise ValueError("No file uploaded. Please upload a CSV file.")
except Exception as e:
    print(f"⚠️ Error during file upload: {e}")

Saving mmm_sample_data.csv to mmm_sample_data.csv
File 'mmm_sample_data.csv' successfully uploaded!


In [6]:
# Authenticate with Google Cloud (using manual OAuth login)
auth.authenticate_user()

In [7]:
# Load the CSV file into a pandas dataframe
try:
    df = pd.read_csv(csv_filename)
    print(f"✅ File '{csv_filename}' loaded into dataframe.")
except Exception as e:
    print(f"⚠️ Error reading the CSV file: {e}")

✅ File 'mmm_sample_data.csv' loaded into dataframe.


In [8]:
# Create BigQuery client
client = bigquery.Client(project=PROJECT_ID)

In [9]:
# Create dataset if it doesn't exist
full_dataset_id = f"{PROJECT_ID}.{DATASET_ID}"
dataset = bigquery.Dataset(full_dataset_id)
dataset.location = "US"

try:
    dataset = client.create_dataset(dataset, exists_ok=True)
    print(f"✅ Dataset '{full_dataset_id}' created or already exists.")
except Exception as e:
    print(f"⚠️ Error creating dataset: {e}")

✅ Dataset 'meu-projeto-dbt.sample_data' created or already exists.


In [10]:
# Configure table loading
job_config = bigquery.LoadJobConfig(
    autodetect=True,  # Automatically detects data types
    write_disposition="WRITE_TRUNCATE",  # Overwrites the table if it already exists
)

In [11]:
# Load data into BigQuery
table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
try:
    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # Wait for the job to complete
    print(f"✅ Data successfully loaded into the table `{table_ref}`!")
except Exception as e:
    print(f"⚠️ Error loading data into BigQuery: {e}")

✅ Data successfully loaded into the table `meu-projeto-dbt.sample_data.marketing_data`!


In [12]:
# Verify if the data was loaded correctly
try:
    query = f"SELECT COUNT(*) AS total_rows FROM `{table_ref}`"
    result = client.query(query).to_dataframe()
    print(f"📊 Total rows loaded: {result['total_rows'][0]}")
except Exception as e:
    print(f"⚠️ Error querying the table: {e}")

print("")

📊 Total rows loaded: 804



In [13]:
# Fetch and display the first 5 rows of the loaded table
try:
    query = f"SELECT * FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}` LIMIT 5"
    result = client.query(query).to_dataframe()
    print("📊 First 5 rows of the loaded table:")
    print(result)
except Exception as e:
    print(f"⚠️ Error fetching the first 5 rows: {e}")

📊 First 5 rows of the loaded table:
         date       tv  radio      ooh     meta    google   tiktok  digital  \
0  2023-04-27     0.00    NaN  1084.20  3796.61   7159.81  4019.28  2106.97   
1  2023-08-25  4055.11    NaN  1012.66  4035.08   4607.99  2801.70  1784.53   
2  2023-10-14  3640.98    NaN   934.71  2275.59   4254.67  2727.94  1627.36   
3  2023-11-17  6816.80    NaN   752.11  3965.98   9265.49  4155.98  2098.45   
4  2024-03-19  9435.95    NaN  1296.58  5003.28  10658.48  8675.10  2588.44   

         sales  holiday  
0   7693449.97      0.0  
1   7512609.44      0.0  
2   5274272.74      0.0  
3  49283224.10      0.0  
4  24741935.76      0.0  
