In [None]:
%%capture
!pip install pandas
!pip install --upgrade google-cloud-bigquery
!pip install --upgrade google-cloud-storage
!pip install sqlite3

In [None]:
# Authenticate Google Account
from google.colab import auth
auth.authenticate_user()


In [None]:
from google.cloud import bigquery

# Set your Google Cloud Project ID here
project_id = 'Text-SQL-Project'
client = bigquery.Client(project=project_id)


In [None]:
from google.cloud import storage

# Initialize Cloud Storage client
storage_client = storage.Client(project=project_id)

# Create a new bucket (if you don't already have one)
bucket_name = 'northwind_dataset_bucket'  # Change this to a unique name
bucket = storage_client.create_bucket(bucket_name)

print(f"Bucket {bucket_name} created.")


Bucket northwind_dataset_bucket created.


In [None]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect('northwind.db')

# Get the list of all tables in the database
tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql_query(tables_query, conn)

# Iterate over each table and export it as a CSV
for table in tables['name']:
    # Wrap the table name in square brackets to handle table names with spaces
    table_name_escaped = f'[{table}]'

    try:
        # Read each table into a pandas DataFrame
        df = pd.read_sql_query(f"SELECT * FROM {table_name_escaped}", conn)

        # Export the table to a CSV file
        df.to_csv(f'{table}.csv', index=False)
        print(f"Exported {table} to {table}.csv")

    except Exception as e:
        print(f"Error exporting {table}: {e}")

conn.close()


Exported Categories to Categories.csv
Exported sqlite_sequence to sqlite_sequence.csv
Exported CustomerCustomerDemo to CustomerCustomerDemo.csv
Exported CustomerDemographics to CustomerDemographics.csv
Exported Customers to Customers.csv
Exported Employees to Employees.csv
Exported EmployeeTerritories to EmployeeTerritories.csv
Exported Order Details to Order Details.csv
Exported Orders to Orders.csv
Exported Products to Products.csv
Exported Regions to Regions.csv
Exported Shippers to Shippers.csv
Exported Suppliers to Suppliers.csv
Exported Territories to Territories.csv


In [None]:
import os

storage_client = storage.Client(project=project_id)
bucket = storage_client.get_bucket(bucket_name)

# List of all your CSV files
csv_files = ['Categories.csv', 'sqlite_sequence.csv', 'CustomerCustomerDemo.csv',
             'CustomerDemographics.csv', 'Customers.csv', 'Employees.csv',
             'EmployeeTerritories.csv', 'Order Details.csv', 'Orders.csv',
             'Products.csv', 'Regions.csv', 'Shippers.csv',
             'Suppliers.csv', 'Territories.csv']

# Upload each CSV file to your GCS bucket
for csv_file in csv_files:
    blob = bucket.blob(csv_file)
    blob.upload_from_filename(csv_file)  # Assuming the CSV files are in the current working directory
    print(f"Uploaded {csv_file} to GCS bucket {bucket_name}")


Uploaded Categories.csv to GCS bucket northwind_dataset_bucket
Uploaded sqlite_sequence.csv to GCS bucket northwind_dataset_bucket
Uploaded CustomerCustomerDemo.csv to GCS bucket northwind_dataset_bucket
Uploaded CustomerDemographics.csv to GCS bucket northwind_dataset_bucket
Uploaded Customers.csv to GCS bucket northwind_dataset_bucket
Uploaded Employees.csv to GCS bucket northwind_dataset_bucket
Uploaded EmployeeTerritories.csv to GCS bucket northwind_dataset_bucket
Uploaded Order Details.csv to GCS bucket northwind_dataset_bucket
Uploaded Orders.csv to GCS bucket northwind_dataset_bucket
Uploaded Products.csv to GCS bucket northwind_dataset_bucket
Uploaded Regions.csv to GCS bucket northwind_dataset_bucket
Uploaded Shippers.csv to GCS bucket northwind_dataset_bucket
Uploaded Suppliers.csv to GCS bucket northwind_dataset_bucket
Uploaded Territories.csv to GCS bucket northwind_dataset_bucket


In [None]:
from google.cloud import bigquery

# Initialize BigQuery client with your project ID
project_id = 'text-sql-project'
client = bigquery.Client(project=project_id)

# Specify the dataset where you want to load your tables
dataset_id = 'northwind_dataset'  # Replace with your BigQuery dataset name

# List of your CSV files in GCS
csv_files = ['Categories.csv', 'sqlite_sequence.csv', 'CustomerCustomerDemo.csv',
             'CustomerDemographics.csv', 'Customers.csv', 'Employees.csv',
             'EmployeeTerritories.csv', 'Order Details.csv', 'Orders.csv',
             'Products.csv', 'Regions.csv', 'Shippers.csv',
             'Suppliers.csv', 'Territories.csv']

# Name of your GCS bucket
bucket_name = 'northwind_dataset_bucket'

# Loop through each CSV file and load it into BigQuery
for csv_file in csv_files:
    # Replace space with underscores in table names
    table_id = csv_file.replace('.csv', '').replace(' ', '_')  # Table name derived from CSV file

    # Specify the URI of the CSV file in GCS
    gcs_uri = f'gs://{bucket_name}/{csv_file}'

    # Configure the load job
    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV,
        skip_leading_rows=1,  # Skip header row
        autodetect=True,  # Automatically detect schema based on CSV content
        max_bad_records=10,  # Allow BigQuery to skip up to 5 bad records
        quote_character='"'  # Handle quoted strings properly
    )

    # Start the load job
    load_job = client.load_table_from_uri(
        gcs_uri,
        f'{dataset_id}.{table_id}',  # BigQuery table name (same as CSV but without ".csv")
        job_config=job_config
    )

    # Wait for the job to complete
    load_job.result()

    print(f"Loaded {table_id} into BigQuery.")

Loaded Categories into BigQuery.
Loaded sqlite_sequence into BigQuery.
Loaded CustomerCustomerDemo into BigQuery.
Loaded CustomerDemographics into BigQuery.
Loaded Customers into BigQuery.
Loaded Employees into BigQuery.
Loaded EmployeeTerritories into BigQuery.
Loaded Order_Details into BigQuery.
Loaded Orders into BigQuery.
Loaded Products into BigQuery.
Loaded Regions into BigQuery.
Loaded Shippers into BigQuery.
Loaded Suppliers into BigQuery.
Loaded Territories into BigQuery.
