# This is to pull data from the Carbitrage GBQ processed table

In [1]:
from google.cloud import bigquery
import sqlite3
import os

def fetch_data_from_gbq(query, project_id):
    client = bigquery.Client(project="umt-msba")
    query_job = client.query(query)
    results = query_job.result()
    return results

def store_data_in_db(data, db_path, table_name):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Convert iterator to list so we can use it multiple times
    data_list = list(data)
    
    if not data_list:
        print("No data returned from BigQuery.")
        conn.close()
        return
    
    # Get schema from the first row
    first_row = data_list[0]
    schema = list(first_row.keys())
    
    # Create table with dynamic columns
    columns = ", ".join([f"{col} TEXT" for col in schema])
    cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            {columns}
        )
    """)
    
    # Insert all data (including first row)
    placeholders = ','.join(['?' for _ in range(len(schema))])
    for row in data_list:
        cursor.execute(f"INSERT INTO {table_name} VALUES ({placeholders})", tuple(row.values()))
    
    conn.commit()
    conn.close()




In [2]:
gbq_query = "SELECT * FROM umt-msba.carbitrage.processed_listing_pages"
project_id = "your_project_id"
db_path = "./data/carbitrage.db"
table_name = "carbitrage_table"



In [3]:
# Add this before connecting to the database
os.makedirs(os.path.dirname(db_path), exist_ok=True)



In [4]:
data = fetch_data_from_gbq(gbq_query, project_id)



In [5]:
store_data_in_db(data, db_path, table_name)

  cursor.execute(f"INSERT INTO {table_name} VALUES ({placeholders})", tuple(row.values()))
