In [1]:
from snowflake import connector
import pathlib
from dotenv import dotenv_values
import pandas as pd
from sqlalchemy import create_engine
import os

In [2]:
# Get the parent directory of the current working directory
script_path = pathlib.Path.cwd().parent
config = dotenv_values(f"{script_path}/configuration.env")

In [3]:
# Create the connection URL with proper formatting
connection_url = (
    f"snowflake://{config.get('snowflake_user')}:"
    f"{config.get('snowflake_password')}@"
    f"{config.get('snowflake_account')}/"
    # f"{config.get('snowflake_database')}/"
    # f"{config.get('snowflake_schema')}?"
    f"warehouse={config.get('snowflake_warehouse')}&"
    f"role={config.get('snowflake_role')}"
)

engine = create_engine(connection_url)

In [6]:
# Load data with pandas
query = """
SELECT
    f.review_id AS review_id,
    f.date_submitted_id AS review_date_id,
    d.day_of_week_name AS review_day_of_week,
    d.cal_mon_name AS review_month_name,
    d.cal_month AS review_month_number,
    d.cal_year AS review_year,
    f.verified AS review_verified,
    c.customer_name AS customer_name,
    d.cal_mon_name AS flight_month_name,
    d.cal_month AS flight_month_number,
    d.cal_year AS flight_year,
    CONCAT(d.cal_mon_name, '-', d.cal_year) AS flight_month_year,
    c.nationality AS customer_country,
    a.aircraft_model AS aircraft_model,
    a.aircraft_manufacturer AS aircraft_manufacturer,
    a.seat_capacity AS aircraft_seat_capacity,
    f.seat_type AS review_seat_type,
    ol.city AS origin_city,
    dl.city AS destination_city
FROM 
    british_airways_db.marts.fct_review f
JOIN 
    british_airways_db.marts.dim_date d ON f.date_submitted_id = d.date_id
JOIN 
    british_airways_db.marts.dim_customer c ON f.customer_id = c.customer_id
JOIN 
    british_airways_db.marts.dim_location ol ON f.origin_location_id = ol.location_id
JOIN 
    british_airways_db.marts.dim_location dl ON f.destination_location_id = dl.location_id
JOIN 
    british_airways_db.marts.dim_aircraft a ON f.aircraft_id = a.aircraft_id
WHERE 
    f.date_submitted_id IS NOT NULL;
"""
df = pd.read_sql(query, engine)

df.head()

Unnamed: 0,review_id,review_date_id,review_day_of_week,review_month_name,review_month_number,review_year,review_verified,customer_name,flight_month_name,flight_month_number,flight_year,flight_month_year,customer_country,aircraft_model,aircraft_manufacturer,aircraft_seat_capacity,review_seat_type,flight_route,origin_city,destination_city
0,1,2015-08-23,Sun,August,8,2015,False,Colin Pay,August,8,2015,August-2015,United Kingdom,A319,Airbus,134.0,Economy Class,London to Dublin,London,Dublin
1,2,2015-08-25,Tue,August,8,2015,False,Desmond Jones,August,8,2015,August-2015,United Kingdom,Unknown,Unknown,,Economy Class,London to Copenhagen,London,Copenhagen
2,3,2015-08-25,Tue,August,8,2015,False,Kenneth Barton,August,8,2015,August-2015,Canada,A319,Airbus,134.0,Economy Class,London to Toulouse,London,Toulouse
3,4,2015-08-26,Wed,August,8,2015,False,A Coogans,August,8,2015,August-2015,United Kingdom,A320,Airbus,180.0,Economy Class,Glasgow to Prague,Glasgow,Prague
4,5,2015-08-26,Wed,August,8,2015,False,C Johnson,August,8,2015,August-2015,United Kingdom,Unknown,Unknown,,Economy Class,Denver to London,Denver,London


In [7]:
df.columns

Index(['review_id', 'review_date_id', 'review_day_of_week',
       'review_month_name', 'review_month_number', 'review_year',
       'review_verified', 'customer_name', 'flight_month_name',
       'flight_month_number', 'flight_year', 'flight_month_year',
       'customer_country', 'aircraft_model', 'aircraft_manufacturer',
       'aircraft_seat_capacity', 'review_seat_type', 'flight_route',
       'origin_city', 'destination_city'],
      dtype='object')