Step 1. Connect to PostgreSQL

In [1]:
# Connect to postgresql db using sqlalchemy library and read data from it to pandas dataframe

import pandas as pd
from sqlalchemy import create_engine

postgresql_user = 'postgres'
postgresql_host = 'localhost'
postgresql_port = '5432'
postgresql_dbname = 'postgres'

# Create the database URI
database_uri = f'postgresql+psycopg2://{postgresql_user}@{postgresql_host}:{postgresql_port}/{postgresql_dbname}'

# Create the SQLAlchemy engine
engine = create_engine(database_uri)

# Define the sql query
query1 = 'SELECT * FROM formula1.qualifyingResults2023;'

# Use Pandas to read the SQL query into a DataFrame
qualifyingResults_df = pd.read_sql(query1, engine) 

# Print dataframe
qualifyingResults_df

Unnamed: 0,Track,Position,No,Driver,Team,Q1,Q2,Q3,Laps
0,Bahrain,1,1,Max Verstappen,Red Bull Racing Honda RBPT,1:31.295,1:30.503,1:29.708,15
1,Bahrain,2,11,Sergio Perez,Red Bull Racing Honda RBPT,1:31.479,1:30.746,1:29.846,15
2,Bahrain,3,16,Charles Leclerc,Ferrari,1:31.094,1:30.282,1:30.000,17
3,Bahrain,4,55,Carlos Sainz,Ferrari,1:30.993,1:30.515,1:30.154,18
4,Bahrain,5,14,Fernando Alonso,Aston Martin Aramco Mercedes,1:31.158,1:30.645,1:30.336,15
...,...,...,...,...,...,...,...,...,...
415,Las Vegas,16,4,Lando Norris,McLaren Mercedes,1:34.703,,,9
416,Las Vegas,17,31,Esteban Ocon,Alpine Renault,1:34.834,,,10
417,Las Vegas,18,24,Guanyu Zhou,Alfa Romeo Ferrari,1:34.849,,,10
418,Las Vegas,19,81,Oscar Piastri,McLaren Mercedes,1:34.850,,,10


In [2]:
query2 = 'SELECT * FROM formula1.raceresults2023;' 
raceresults_df = pd.read_sql(query2, engine) 
raceresults_df

Unnamed: 0,Track,Position,No,Driver,Team,Starting_Grid,Laps,Time_Retired,Points,Set_Fastest_Lap,Fastest_Lap_Time
0,Bahrain,1,1,Max Verstappen,Red Bull Racing Honda RBPT,1,57,1:33:56.736,25,No,1:36.236
1,Bahrain,2,11,Sergio Perez,Red Bull Racing Honda RBPT,2,57,+11.987,18,No,1:36.344
2,Bahrain,3,14,Fernando Alonso,Aston Martin Aramco Mercedes,5,57,+38.637,15,No,1:36.156
3,Bahrain,4,55,Carlos Sainz,Ferrari,4,57,+48.052,12,No,1:37.130
4,Bahrain,5,44,Lewis Hamilton,Mercedes,7,57,+50.977,10,No,1:36.546
...,...,...,...,...,...,...,...,...,...,...,...
415,Las Vegas,16,2,Logan Sargeant,Williams Mercedes,6,50,+50.882,0,No,1:37.740
416,Las Vegas,17,77,Valtteri Bottas,Alfa Romeo Ferrari,7,50,+85.350,0,No,1:37.562
417,Las Vegas,18,22,Yuki Tsunoda,AlphaTauri Honda RBPT,20,46,DNF,0,No,1:37.587
418,Las Vegas,19,27,Nico Hulkenberg,Haas Ferrari,13,45,DNF,0,No,1:37.565


Step 2. Connect to Snowflake and load the data 

In [3]:
# Migrate data from PostgreSQL database table to Snowflake using Pandas dataframe as intermediary 

import pandas as pd
from sqlalchemy import create_engine
import snowflake.connector 
from snowflake.snowpark import Session
from snowflake.connector.pandas_tools import write_pandas

# Additionally
# pip install snowflake-connector-python
# pip install "snowflake-snowpark-python[pandas]"
# pip install notebook

postgresql_user = 'postgres'
postgresql_host = 'localhost'
postgresql_port = '5432'
postgresql_dbname = 'postgres'

# Create the database URI
database_uri = f'postgresql+psycopg2://{postgresql_user}@{postgresql_host}:{postgresql_port}/{postgresql_dbname}'

# Create the SQLAlchemy engine
engine = create_engine(database_uri)

# Define the sql query
query1 = 'SELECT * FROM formula1.qualifyingResults2023;'

# Use Pandas to read the SQL query into a DataFrame
qualifyingResults_df = pd.read_sql(query1, engine)
print("# of rows in qualifyingResults_df = ", len(qualifyingResults_df))

# Configure connection to Snowflake
snowflake_conn = snowflake.connector.connect(
          user='Hanna',
          password='fREBSBF9pBEbnJW',
          account='lkhpmcc-cn69015',
          warehouse='COMPUTE_WH',
          database='F1', #Put your db name here
          schema='RAW')

success, nchunks, nrows, _ = write_pandas(snowflake_conn, qualifyingResults_df, 'QRESULTS',  auto_create_table=True)
print(f'success = {success}, nchunks = {nchunks}, nrows = {nrows}')

# of rows in qualifyingResults_df =  420
success = True, nchunks = 1, nrows = 420


In [6]:
query2 = 'SELECT * FROM formula1.raceresults2023;'
raceresults_df = pd.read_sql(query2, engine) 
print("# of rows in raceresults_df = ", len(raceresults_df))

success, nchunks, nrows, _ = write_pandas(snowflake_conn, raceresults_df, 'RRESULTS',  auto_create_table=True)
print(f'success = {success}, nchunks = {nchunks}, nrows = {nrows}')

# of rows in raceresults_df =  420
success = True, nchunks = 1, nrows = 420
