In [2]:
import sqlalchemy
import pandas as pd
import psycopg2
from sqlalchemy import create_engine, inspect

In [7]:
df_airlines = pd.read_csv('data/airlines.csv')
df_airlines

Unnamed: 0,Airline ID,Name,ICAO,Country,Active
0,2,135 Airways,GNL,United States,N
1,3,1Time Airline,RNX,South Africa,Y
2,4,2 Sqn No 1 Elementary Flying Training School,WYT,United Kingdom,N
3,5,213 Flight Unit,TFU,Russia,N
4,6,223 Flight Unit State Airline,CHD,Russia,N
...,...,...,...,...,...
5867,21181,Air Andaman (2Y),AOW,Thailand,N
5868,21240,TDA Toa Domestic Airlines,TDA,Japan,N
5869,21248,GX Airlines,CBG,China,Y
5870,21251,Lynx Aviation (L3/SSX),SSX,United States,N


In [13]:
#Get DB credentials
from dotenv import load_dotenv
import os

from dotenv import load_dotenv
from pathlib import Path
import os

dotenv_path = Path('db_credentials.env')
load_dotenv(dotenv_path=dotenv_path)

DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DATABRICKS_TOKEN = os.getenv('DATABRICKS_TOKEN')
DATABRICKS_INSTANCE = os.getenv('DATABRICKS_INSTANCE')

In [20]:
#Connect to RDS
conf ={
    'host':DB_HOST,
    'port':DB_PORT,
    'database':DB_NAME,
    'user':DB_USER,
    'password':DB_PASSWORD
}
engine = create_engine("postgresql://{user}:{password}@{host}:{port}/{user}".format(**conf))

df_airlines.to_sql('bronze_airline_codes', engine, index=False, if_exists='replace')

872

In [54]:
sql="""
        SELECT *
        FROM bronze_airline_codes AS C        
"""
df_airline_codes = pd.read_sql_query(sql, engine)
df_airline_codes

Unnamed: 0,Airline ID,Name,ICAO,Country,Active
0,2,135 Airways,GNL,United States,N
1,3,1Time Airline,RNX,South Africa,Y
2,4,2 Sqn No 1 Elementary Flying Training School,WYT,United Kingdom,N
3,5,213 Flight Unit,TFU,Russia,N
4,6,223 Flight Unit State Airline,CHD,Russia,N
...,...,...,...,...,...
5867,21181,Air Andaman (2Y),AOW,Thailand,N
5868,21240,TDA Toa Domestic Airlines,TDA,Japan,N
5869,21248,GX Airlines,CBG,China,Y
5870,21251,Lynx Aviation (L3/SSX),SSX,United States,N


In [17]:
sql="""
        SELECT
        	R.post_id AS airline_review_id,
            L.responsible_airline AS airline_name,
        	NULL AS rating_overall,
        	R.title AS review_title,
        	R.created_utc AS review_date,
        	NULL AS verified,
        	R.selftext AS review_text,
        	NULL AS aircraft,
        	NULL AS type_of_traveller,
        	NULL AS seat_type,
        	L.mentioned_airlines_routes AS route,
        	NULL AS flight_date,
        	NULL AS rating_seat_comfort,
        	NULL AS rating_cabin_staff_service,
        	NULL AS rating_food_and_beverages,
        	NULL AS rating_ground_service,
        	NULL AS rating_inflight_entertainment,
        	NULL AS rating_wifi_and_connectivity,
        	NULL AS rating_value_for_money,
        	NULL AS recommended,
            CASE WHEN L.luggage_issue = 1 THEN true ELSE false END AS is_lost_luggage_flag,
            'From_Reddit' AS created_by,
            R.created_utc AS created_date,
            'From_Reddit' AS lastmodified_by,
            R.created_utc AS lastmodified_date
        FROM bronze_reddit_reviews AS R
        INNER JOIN labeling_values AS L ON L.post_id = R.post_id
        WHERE L.responsible_airline != 'unknown'
        AND L.aviation_related = 1
"""
df_reddit = pd.read_sql_query(sql, engine)
df_reddit

Unnamed: 0,airline_review_id,airline_name,rating_overall,review_title,review_date,verified,review_text,aircraft,type_of_traveller,seat_type,...,rating_ground_service,rating_inflight_entertainment,rating_wifi_and_connectivity,rating_value_for_money,recommended,is_lost_luggage_flag,created_by,created_date,lastmodified_by,lastmodified_date
0,reddit_1j312r7,UAL,,Lost luggage,2025-03-04,,"Flying from BNA to IAH this morning, I checked...",,,,...,,,,,,True,From_Reddit,2025-03-04,From_Reddit,2025-03-04
1,reddit_1iwlkhb,WJA,,WestJet lost luggage,2025-02-23,,\n\nI flew with WestJet from Toronto Pearson (...,,,,...,,,,,,True,From_Reddit,2025-02-23,From_Reddit,2025-02-23
2,reddit_1jcpbiq,DAL,,Delta sold my lost item to “unclaimed Baggage ...,2025-03-16,,Left my AirPod max in seat compartment on inte...,,,,...,,,,,,True,From_Reddit,2025-03-16,From_Reddit,2025-03-16
3,reddit_1j4mu2n,SWA,,"Damaged luggage, employee changing her mind ab...",2025-03-06,,"Today I flew MCO->MDW on WN898. However, when ...",,,,...,,,,,,True,From_Reddit,2025-03-06,From_Reddit,2025-03-06
4,reddit_1isqn95,UAL,,Delayed Baggage Claim Paid This Week,2025-02-18,,My skis were delayed heading into Montana. I h...,,,,...,,,,,,True,From_Reddit,2025-02-18,From_Reddit,2025-02-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,reddit_1iwv894,DAL,,Can a second personal item count as the “carry...,2025-02-24,,I got a lecture from a flight attendant today ...,,,,...,,,,,,True,From_Reddit,2025-02-24,From_Reddit,2025-02-24
86,reddit_1iwzdub,JNA,,Jin Air Transfer at Seoul Incheon,2025-02-24,,"Hi everyone,\n\nI have a flight from Phuket to...",,,,...,,,,,,True,From_Reddit,2025-02-24,From_Reddit,2025-02-24
87,reddit_1iwa4px,WZZ,,Wizzair scheduled my flight back on without no...,2025-02-23,,This was back in September and I just want to ...,,,,...,,,,,,False,From_Reddit,2025-02-23,From_Reddit,2025-02-23
88,reddit_1ivz56g,DKH,,extra baggage juneyao airlines,2025-02-23,,Hi so i’m in japan and doing a flight with jun...,,,,...,,,,,,True,From_Reddit,2025-02-23,From_Reddit,2025-02-23
