In [1]:

import sqlalchemy as sqla
from sqlalchemy import text
import os
from dotenv import load_dotenv
import pandas as pd

In [2]:
# connecting to datatbase

load_dotenv() # loading env variables

# Access environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME2 = os.getenv("DB_NAME2")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

engine = sqla.create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME2}")

In [3]:
# TC1: How many rows are in the bookings table | EASY
with engine.begin() as conn:
    query = text("""select count(*) from bookings.bookings""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,593433


In [4]:
# TC2: How many rows are in the aircrafts table | EASY
with engine.begin() as conn:
    query = text("""select count(*) from bookings.aircrafts_data""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,9


In [5]:
# TC3: find the top ten most expensive flights and order total amounts in descending order | EASY
with engine.begin() as conn:
    query = text("""SELECT   *
                    FROM bookings
                    ORDER BY total_amount desc
                    LIMIT 10;""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,book_ref,book_date,total_amount
0,3B54BB,2017-07-05 14:08:00+00:00,1204500.0
1,53B75C,2017-05-29 06:20:00+00:00,1116700.0
2,3AC131,2017-07-30 22:06:00+00:00,1087100.0
3,4B58DB,2017-05-14 12:39:00+00:00,1065600.0
4,65A6EA,2017-07-03 03:28:00+00:00,1065600.0
5,D7061C,2017-06-06 08:49:00+00:00,1065600.0
6,D7E9AA,2017-08-08 02:29:00+00:00,1062800.0
7,EF479E,2017-08-02 12:58:00+00:00,1035100.0
8,03BCC9,2017-06-26 03:26:00+00:00,1022500.0
9,1BE923,2017-06-24 10:29:00+00:00,1016500.0


In [6]:
# TC4: How many ticket bookings does passenger Antonina Kuznecova have?  | EASY
with engine.begin() as conn:
    query = text("""select count(*) from bookings.tickets where passenger_name in ('ANTONINA KUZNECOVA')""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,117


In [7]:
# TC5: How many ticket bookings does passenger Antonina Kuznecova have (testing for case sensitivity)?  | EASY
with engine.begin() as conn:
    query = text("""select count(*) from bookings.tickets where passenger_name in ('Antonina Kuznecova')""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,0


In [8]:
# TC6: How many distinct flights departed from Moscow? (testing to determine if langchain can Identify db views)  | EASY
with engine.begin() as conn:
    query = text("""select count(distinct flight_no) from routes where departure_city in ('Moscow')""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,154


In [9]:
# TC7: Get me the count of each flight status grouped by status and also provide the minimum and maximum departure ordered by the minimum scheduled
#      departure | MEDIUM
with engine.begin() as conn:
    query = text("""SELECT   status,
                            count(*) as count,
                            min(scheduled_departure) as min_scheduled_departure,
                            max(scheduled_departure) as max_scheduled_departure
                    FROM     flights
                    GROUP BY status 
                    ORDER BY min_scheduled_departure;""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,status,count,min_scheduled_departure,max_scheduled_departure
0,Arrived,49235,2017-05-16 23:00:00+00:00,2017-08-15 14:25:00+00:00
1,Cancelled,429,2017-05-17 16:10:00+00:00,2017-09-14 17:55:00+00:00
2,Departed,58,2017-08-15 06:55:00+00:00,2017-08-15 14:50:00+00:00
3,Delayed,41,2017-08-15 12:15:00+00:00,2017-08-16 14:25:00+00:00
4,On Time,518,2017-08-15 14:55:00+00:00,2017-08-16 15:00:00+00:00
5,Scheduled,15383,2017-08-16 15:05:00+00:00,2017-09-14 17:40:00+00:00


In [10]:
# TC8: Get me the count of each flight status grouped by status. Return only the status and count for each status type | MEDIUM
with engine.begin() as conn:
    query = text("""SELECT   status,
                            count(*) as count
                    FROM     flights
                    GROUP BY status ;""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,status,count
0,Departed,58
1,Arrived,49235
2,On Time,518
3,Cancelled,429
4,Delayed,41
5,Scheduled,15383
