In [5]:
import os
import mysql.connector
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime
import firebase_admin
from firebase_admin import credentials, storage

# Initialize raw_data list
raw_data = []
raw_data.clear()

def ingestion(table):
    load_dotenv()
    connection = mysql.connector.connect(
        host=os.getenv('HOST'),
        user='root',
        password=os.getenv('PASSWORD'),
        database='capstone5'
    )
    query = f"SELECT * FROM {table}"  
    df = pd.read_sql(query, connection)
    connection.close()
    return df

# List of table names
tables = [
    'applications', 'articles', 'comments', 'donation_manual_comments',
    'donation_manuals', 'fundraising_categories', 'fundraisings',
    'likes_comments', 'organizations', 'testimoni_volunteers',
    'user_bookmark_fundraisings', 'user_bookmark_volunteer_vacancies',
    'volunteers', 'users', 'admins', 'like_donation_comments',
    'user_bookmark_articles'
]

# Ingest data for all tables
data_frame = [ingestion(table) for table in tables]

def initialize_firebase():
    load_dotenv()
    if not firebase_admin._apps:
        credentials_path = os.getenv('FIREBASE_CREDENTIALS_PATH')
        cred = credentials.Certificate(credentials_path)
        firebase_admin.initialize_app(cred)
        print("Firebase has been initialized")
    else:
        print("Firebase is already initialized")
    bucket_name = os.getenv('BUCKET_NAME')
    return storage.bucket(bucket_name)

def create_folder_in_bucket():
    bucket = initialize_firebase()
    current_date = datetime.now().strftime("%Y%m%d")
    folder_blob = bucket.blob(f"{current_date}/")
    folder_blob.upload_from_string('')
    print(f"Folder '{current_date}' created successfully.")
    return current_date

def upload_dataframes_to_firebase(data_frames):
    current_date = create_folder_in_bucket()
    bucket = initialize_firebase()
    
    for df, table_name in zip(data_frames, tables):
        # Convert dataframe to CSV string
        csv_str = df.to_csv(index=False)

        # Create the blob reference with folder name
        file_name_with_date = f"{table_name}_{current_date}.csv"
        file_path_in_bucket = f"{current_date}/{file_name_with_date}"
        file_ref = bucket.blob(file_path_in_bucket)
        
        # Upload CSV string to Firebase
        file_ref.upload_from_string(csv_str, content_type='text/csv')
        print(f"Dataframe {table_name} uploaded successfully as {file_name_with_date}!")

# Upload all dataframes to Firebase
upload_dataframes_to_firebase(data_frame)


  df = pd.read_sql(query, connection)
  df = pd.read_sql(query, connection)


Firebase is already initialized
Folder '20240619' created successfully.
Firebase is already initialized
Dataframe applications uploaded successfully as applications_20240619.csv!
Dataframe articles uploaded successfully as articles_20240619.csv!
Dataframe comments uploaded successfully as comments_20240619.csv!
Dataframe donation_manual_comments uploaded successfully as donation_manual_comments_20240619.csv!
Dataframe donation_manuals uploaded successfully as donation_manuals_20240619.csv!
Dataframe fundraising_categories uploaded successfully as fundraising_categories_20240619.csv!
Dataframe fundraisings uploaded successfully as fundraisings_20240619.csv!
Dataframe likes_comments uploaded successfully as likes_comments_20240619.csv!
Dataframe organizations uploaded successfully as organizations_20240619.csv!
Dataframe testimoni_volunteers uploaded successfully as testimoni_volunteers_20240619.csv!
Dataframe user_bookmark_fundraisings uploaded successfully as user_bookmark_fundraisings

In [None]:
import os
import pandas as pd
from google.cloud import bigquery
from io import StringIO
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set Google Cloud credentials
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
if credentials_path:
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
else:
    raise Exception("GOOGLE_APPLICATION_CREDENTIALS is not set in the .env file")

# Create a BigQuery client
client = bigquery.Client()

# Get dataset IDs from environment variables
dataset_id_fact = os.getenv("dataset_id_fact")
dataset_id_dim = os.getenv("dataset_id_dim")

def load_df_to_bigquery(client, dataset_id, table_name, df):
    # Convert DataFrame to CSV
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)

    table_id = f"{dataset_id}.{table_name}"
    
    # Configure partitioning if 'created_at' field exists in the DataFrame
    partition_by = None
    if 'created_at' in df.columns:
        partition_by = bigquery.TimePartitioning(field="created_at")

    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV,
        skip_leading_rows=1,
        autodetect=True,
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        time_partitioning=partition_by
    )

    # Load CSV data from StringIO buffer
    job = client.load_table_from_file(csv_buffer, table_id, job_config=job_config)
    
    # Wait for the load job to complete
    job.result()

    # Get table information
    table = client.get_table(table_id)
    print(
        "Loaded {} rows and {} columns to {}".format(
            table.num_rows, len(table.schema), table_id
        )
    )

In [None]:
import pymysql
import os
from sqlalchemy import create_engine
import pandas as pd

def load_db_local(df):
    # Establish the connection
    connection = pymysql.connect(
        host='localhost',
        user='root',
        password='',
        port=int(os.getenv('port'))
    )

    cursor = connection.cursor()

    # Create database if it doesn't exist
    cursor.execute('CREATE DATABASE IF NOT EXISTS peduli_pintar')
    cursor.execute('USE peduli_pintar')

    # Create the engine to connect to the database
    engine = create_engine('mysql+pymysql://root:@localhost:3307/peduli_pintar')

    # Convert the dataframe to SQL
    df.to_sql(name='data_table', con=engine, if_exists='replace', index=False)
    
    # Add partitioning based on the created_at column
    # Ensure created_at is in the correct datetime format in MySQL
    cursor.execute('ALTER TABLE data_table MODIFY COLUMN created_at DATETIME')

    # Create the partitioned table
    cursor.execute('''
        ALTER TABLE data_table
        PARTITION BY RANGE (YEAR(created_at)) (
            PARTITION p0 VALUES LESS THAN (2021),
            PARTITION p1 VALUES LESS THAN (2022),
            PARTITION p2 VALUES LESS THAN (2023),
            PARTITION p3 VALUES LESS THAN (2024),
            PARTITION p4 VALUES LESS THAN (2025),
            PARTITION p5 VALUES LESS THAN (2026)
        )
    ''')

    # Commit the changes and close the connection
    connection.commit()
    connection.close()

load_db_local()
