In [1]:
import csv
from faker import Faker
from dotenv import load_dotenv
import os
import pandas as pd
from pandas_gbq import to_gbq
from google.oauth2 import service_account

# Initialize Faker
fake = Faker()

load_dotenv()

service_acc = os.getenv('SERVICE_ACCOUNT')
project = os.getenv('PROJECT_ID')
dataset = os.getenv('DATASET_ID')
# Configure Google Cloud credentials and project details
credentials = service_account.Credentials.from_service_account_file(service_acc)
project_id = project
dataset_id = dataset

# Function to save data to CSV
def save_to_csv(filename, data, headers):
    with open(filename, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=headers)
        writer.writeheader()
        writer.writerows(data)

# Function to upload data to BigQuery
def upload_to_bigquery(table_name, data):
    df = pd.DataFrame(data)
    table_id = f"{dataset_id}.{table_name}"
    to_gbq(df, table_id, project_id=project_id, if_exists='replace', credentials=credentials)

# Function to generate dummy data for dim_plant_categories
def insert_dim_plant_categories(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_categories_id': fake.random_int(min=1, max=1000),
            'name': fake.word(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plant_categories.csv', data, data[0].keys())
    upload_to_bigquery('dim_plant_categories', data)
    return data

# Function to generate dummy data for dim_plant_images
def insert_dim_plant_images(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_images_id': fake.random_int(min=1, max=1000),
            'src': fake.image_url(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plant_images.csv', data, data[0].keys())
    upload_to_bigquery('dim_plant_images', data)
    return data

# Function to generate dummy data for dim_plant_instructions
def insert_dim_plant_instructions(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_instructions_id': fake.random_int(min=1, max=1000),
            'step_number': fake.random_int(min=1, max=10),
            'step_desc': fake.sentence(),
            'step_image_url': fake.image_url(),
            'additional_tips': fake.sentence(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plant_instructions.csv', data, data[0].keys())
    upload_to_bigquery('dim_plant_instructions', data)
    return data

# Function to generate dummy data for dim_plant_characteristic
def insert_dim_plant_characteristic(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_characteristic_id': fake.random_int(min=1, max=1000),
            'height': fake.random_int(min=10, max=200),
            'height_unit': 'cm',
            'wide': fake.random_int(min=10, max=200),
            'wide_unit': 'cm',
            'leaf_color': fake.color_name(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plant_characteristic.csv', data, data[0].keys())
    upload_to_bigquery('dim_plant_characteristic', data)
    return data

# Function to generate dummy data for dim_plant_faqs
def insert_dim_plant_faqs(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_faqs_id': fake.random_int(min=1, max=1000),
            'question': fake.sentence(),
            'answer': fake.sentence(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plant_faqs.csv', data, data[0].keys())
    upload_to_bigquery('dim_plant_faqs', data)
    return data

# Function to generate dummy data for dim_users
def insert_dim_users(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'user_id': fake.random_int(min=1, max=1000),
            'name': fake.name(),
            'email': fake.email(),
            'password': fake.password(),
            'otp': fake.random_number(digits=6, fix_len=True),
            'is_active': fake.boolean(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_users.csv', data, data[0].keys())
    upload_to_bigquery('dim_users', data)
    return data

# Function to generate dummy data for dim_plants
def insert_dim_plants(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plants_id': fake.random_int(min=1, max=1000),
            'plant_images_id': fake.random_int(min=1, max=1000),
            'name': fake.word(),
            'description': fake.sentence(),
            'is_toxic': fake.boolean(),
            'harvest_duration': fake.random_int(min=1, max=365),
            'plant_category_id': fake.random_int(min=1, max=1000),
            'climates': fake.word(),
            'created_at': fake.date_this_decade(),
            'updated_at': fake.date_this_decade()
        }
        data.append(record)
    save_to_csv('dim_plants.csv', data, data[0].keys())
    upload_to_bigquery('dim_plants', data)
    return data

# Function to generate dummy data for fact_plants_data
def insert_fact_plants_data(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'plant_categories_id': fake.random_int(min=1, max=1000),
            'plant_faqs_id': fake.random_int(min=1, max=1000),
            'plants_id': fake.random_int(min=1, max=1000),
            'user_id': fake.random_int(min=1, max=1000),
            'plant_characteristic_id': fake.random_int(min=1, max=1000),
            'plant_instructions_id': fake.random_int(min=1, max=1000),
            'total_plants': fake.random_int(min=1, max=100),
            'plants_by_category': fake.random_int(min=1, max=100)
        }
        data.append(record)
    save_to_csv('fact_plants_data.csv', data, data[0].keys())
    upload_to_bigquery('fact_plants_data', data)
    return data

# Function to generate dummy data for fact_fertilizer
def insert_fact_fertilizer(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'user_id': fake.random_int(min=1, max=1000),
            'plants_id': fake.random_int(min=1, max=1000),
            'plant_categories_id': fake.random_int(min=1, max=1000),
            'fertilizer_amount': fake.random_int(min=1, max=100),
            'fertilizer_frequency': fake.random_int(min=1, max=365)
        }
        data.append(record)
    save_to_csv('fact_fertilizer.csv', data, data[0].keys())
    upload_to_bigquery('fact_fertilizer', data)
    return data

# Function to generate dummy data for fact_watering_reminders
def insert_fact_watering_reminders(num_records):
    data = []
    for _ in range(num_records):
        record = {
            'user_id': fake.random_int(min=1, 
                                       max=1000),
            'plants_id': fake.random_int(min=1, max=1000),
            'plant_categories_id': fake.random_int(min=1, max=1000),
            'watering_frequency': fake.random_int(min=1, max=365),
            'watering_amount': fake.random_int(min=1, max=100)
        }
        data.append(record)
    save_to_csv('fact_watering_reminders.csv', data, data[0].keys())
    upload_to_bigquery('fact_watering_reminders', data)
    return data

# Generate and insert dummy data
table_functions = {
    'dim_plant_categories': insert_dim_plant_categories,
    'dim_plant_images': insert_dim_plant_images,
    'dim_plant_instructions': insert_dim_plant_instructions,
    'dim_plant_characteristic': insert_dim_plant_characteristic,
    'dim_plant_faqs': insert_dim_plant_faqs,
    'dim_users': insert_dim_users,
    'dim_plants': insert_dim_plants,
    'fact_plants_data': insert_fact_plants_data,
    'fact_fertilizer': insert_fact_fertilizer,
    'fact_watering_reminders': insert_fact_watering_reminders
}

for table, func in table_functions.items():
    data = func(100)  # Generate 100 records for each table

print("Data insertion and CSV creation completed!")

Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects/plantopia-capstone/datasets/plantopia/tables/dim_plant_categories?prettyPrint=false: Access Denied: Table plantopia-capstone:plantopia.dim_plant_categories: Permission bigquery.tables.get denied on table plantopia-capstone:plantopia.dim_plant_categories (or it may not exist).