<a href="https://colab.research.google.com/github/Sagaust/DH-Computational-Methodologies/blob/main/sqlite_csv_BigQuery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import sqlite3
conn = sqlite3.connect('/content/db2.sqlite3')
cur = conn.cursor()

In [3]:
# Query the sqlite_master table to view the schema
cur.execute("SELECT type, name, sql FROM sqlite_master WHERE type='table'")
schema = cur.fetchall()

for table_type, table_name, sql in schema:
    print(f"Table: {table_name}\nCreation SQL:\n{sql}\n")

Table: django_migrations
Creation SQL:
CREATE TABLE "django_migrations" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "app" varchar(255) NOT NULL, "name" varchar(255) NOT NULL, "applied" datetime NOT NULL)

Table: sqlite_sequence
Creation SQL:
CREATE TABLE sqlite_sequence(name,seq)

Table: auth_group_permissions
Creation SQL:
CREATE TABLE "auth_group_permissions" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "group_id" integer NOT NULL REFERENCES "auth_group" ("id") DEFERRABLE INITIALLY DEFERRED, "permission_id" integer NOT NULL REFERENCES "auth_permission" ("id") DEFERRABLE INITIALLY DEFERRED)

Table: auth_user_groups
Creation SQL:
CREATE TABLE "auth_user_groups" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "user_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED, "group_id" integer NOT NULL REFERENCES "auth_group" ("id") DEFERRABLE INITIALLY DEFERRED)

Table: auth_user_user_permissions
Creation SQL:
CREATE TABLE "auth_user_user_permissions" 

In [4]:
cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cur.fetchall()
for table in tables:
    print(table[0])


django_migrations
sqlite_sequence
auth_group_permissions
auth_user_groups
auth_user_user_permissions
django_admin_log
django_content_type
auth_permission
auth_group
auth_user
autos_make
autos_auto
bookmany_book
bookmany_authored
bookmany_author
bookone_book
bookone_instance
bookone_lang
cats_breed
cats_cat
chat_message
favs_thing
favs_fav
favsql_thing
favsql_fav
form_cat
forums_forum
forums_comment
gview_car
gview_cat
gview_dog
gview_horse
many_person
many_membership
many_course
myarts_article
rest_breed
rest_cat
django_session
social_auth_association
social_auth_code
social_auth_nonce
social_auth_partial
social_auth_usersocialauth
taggit_taggeditem
taggit_tag
tagme_forum
tagme_comment
tracks_album
tracks_artist
tracks_genre
tracks_track
users_user
well_post
pics_pic
photogallery_photo
photogallery_comment


In [6]:
cur.execute('SELECT COUNT(*) FROM auth_user')
count = cur.fetchone()[0]
print("Number of users:", count)

Number of users: 1


In [8]:
import pandas as pd

In [13]:
# Retrieve the list of all tables in the database
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [row[0] for row in cursor.fetchall()]

In [14]:
# Print available tables for verification
print("Available tables:", tables)

Available tables: ['django_migrations', 'sqlite_sequence', 'auth_group_permissions', 'auth_user_groups', 'auth_user_user_permissions', 'django_admin_log', 'django_content_type', 'auth_permission', 'auth_group', 'auth_user', 'autos_make', 'autos_auto', 'bookmany_book', 'bookmany_authored', 'bookmany_author', 'bookone_book', 'bookone_instance', 'bookone_lang', 'cats_breed', 'cats_cat', 'chat_message', 'favs_thing', 'favs_fav', 'favsql_thing', 'favsql_fav', 'form_cat', 'forums_forum', 'forums_comment', 'gview_car', 'gview_cat', 'gview_dog', 'gview_horse', 'many_person', 'many_membership', 'many_course', 'myarts_article', 'rest_breed', 'rest_cat', 'django_session', 'social_auth_association', 'social_auth_code', 'social_auth_nonce', 'social_auth_partial', 'social_auth_usersocialauth', 'taggit_taggeditem', 'taggit_tag', 'tagme_forum', 'tagme_comment', 'tracks_album', 'tracks_artist', 'tracks_genre', 'tracks_track', 'users_user', 'well_post', 'pics_pic', 'photogallery_photo', 'photogallery_co

In [15]:
# Export each table to a CSV file
for table in tables:
    df = pd.read_sql_query(f"SELECT * FROM {table}", conn)
    df.to_csv(f"{table}.csv", index=False)

In [16]:
!pip install google-cloud-bigquery




In [18]:
from google.cloud import bigquery
from google.colab import auth
import os

# Authenticate and create a client
auth.authenticate_user()
project_id = 'arn-administration'  # Replace with your project ID
client = bigquery.Client(project=project_id)

# Define your BigQuery Dataset
dataset_id = 'DjangoTemps'  # Replace with your dataset ID
dataset_ref = client.dataset(dataset_id)

# Directory where your CSV files are stored
directory = "/content/"  # Assuming all CSVs are in the '/content/' directory

# Iterate over each file in the directory and load it into BigQuery
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        table_id = filename.split('.')[0]  # Assuming table name is same as filename
        table_ref = dataset_ref.table(table_id)

        job_config = bigquery.LoadJobConfig()
        job_config.source_format = bigquery.SourceFormat.CSV
        job_config.skip_leading_rows = 1  # Set to 0 if no header row
        job_config.autodetect = True
        job_config.max_bad_records = 10  # Allows up to 10 bad records before failing


        with open(os.path.join(directory, filename), "rb") as source_file:
            job = client.load_table_from_file(source_file, table_ref, job_config=job_config)

        job.result()  # Waits for the job to complete

        print(f"Loaded {filename} into {dataset_id}.{table_id}")


Loaded favsql_fav.csv into DjangoTemps.favsql_fav
Loaded tracks_genre.csv into DjangoTemps.tracks_genre
Loaded many_person.csv into DjangoTemps.many_person
Loaded favs_thing.csv into DjangoTemps.favs_thing
Loaded tracks_album.csv into DjangoTemps.tracks_album
Loaded bookmany_author.csv into DjangoTemps.bookmany_author
Loaded myarts_article.csv into DjangoTemps.myarts_article
Loaded social_auth_code.csv into DjangoTemps.social_auth_code
Loaded photogallery_photo.csv into DjangoTemps.photogallery_photo
Loaded autos_make.csv into DjangoTemps.autos_make
Loaded bookone_instance.csv into DjangoTemps.bookone_instance
Loaded sqlite_sequence.csv into DjangoTemps.sqlite_sequence
Loaded django_session.csv into DjangoTemps.django_session
Loaded favs_fav.csv into DjangoTemps.favs_fav
Loaded social_auth_usersocialauth.csv into DjangoTemps.social_auth_usersocialauth
Loaded gview_car.csv into DjangoTemps.gview_car
Loaded auth_permission.csv into DjangoTemps.auth_permission
Loaded social_auth_nonce.csv