In [5]:
import pandas as pd
import psycopg2

In [7]:
#Get DB credentials
from dotenv import load_dotenv
import os

from dotenv import load_dotenv
from pathlib import Path
import os

dotenv_path = Path('db_credentials.env')
load_dotenv(dotenv_path=dotenv_path)

DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_DATABASE')
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DATABRICKS_TOKEN = os.getenv('DATABRICKS_TOKEN')
DATABRICKS_INSTANCE = os.getenv('DATABRICKS_INSTANCE')

In [19]:
try:
    # Connect to the PostgreSQL database
    connection = psycopg2.connect(
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD,
        host=DB_HOST,
        port=DB_PORT
    )
    
    cursor = connection.cursor()
    
    cursor.execute("""
        DROP VIEW IF EXISTS silver_consolidated_airline_reviews
    """)
    
    cursor.execute("""
        CREATE VIEW silver_consolidated_airline_reviews AS
        SELECT
        	CONCAT('K', "Id") AS airline_review_id,
            "Airline Name" AS airline_name,
        	"Overall_Rating" AS rating_overall,
        	"Review_Title" AS review_title,
        	"Review Date" AS review_date,
        	"Verified" AS verified,
        	"Review" AS review_text,
        	"Aircraft" AS aircraft,
        	"Type Of Traveller" AS type_of_traveller,
        	"Seat Type" AS seat_type,
        	"Route" AS route,
        	"Date Flown" AS flight_date,
        	"Seat Comfort" AS rating_seat_comfort,
        	"Cabin Staff Service" AS rating_cabin_staff_service,
        	"Food & Beverages" AS rating_food_and_beverages,
        	"Ground Service" AS rating_ground_service,
        	"Inflight Entertainment" AS rating_inflight_entertainment,
        	"Wifi & Connectivity" AS rating_wifi_and_connectivity,
        	"Value For Money" AS rating_value_for_money,
        	"Recommended" AS recommended,
            "is_lost_luggage_flag",
            "is_lost_luggage_score",
            "sentiment_label",
            "sentiment_scores",
            "created_by",
            "created_date",
            "lastmodified_by",
            "lastmodified_date"
        FROM silver_airline_quality_reviews AS K

        UNION ALL

        SELECT
        	R.post_id AS airline_review_id,
            AC."Name" AS airline_name,
            --CONCAT(AC."Name", ' (', L.responsible_airline, ')') AS airline_name,
        	NULL AS rating_overall,
        	R.title AS review_title,
        	R.created_utc AS review_date,
        	NULL AS verified,
        	R.selftext AS review_text,
        	NULL AS aircraft,
        	NULL AS type_of_traveller,
        	NULL AS seat_type,
        	CASE
                WHEN L.mentioned_airlines_routes = '{}' THEN null
                ELSE
                    NULLIF(
                    REPLACE(
                    REPLACE(
                    REPLACE(
                    REPLACE(
                    REPLACE(
                    REPLACE(
                    REPLACE( 
                        REPLACE(L.mentioned_airlines_routes, '},{', ' to ')
                        , ',', ' to ')
                        , '{{', '')
                        , '}}', '')
                        , '{', '')
                        , '}', '')
                        , 'unknown to ', '')
                        , 'unknown', '')
                        ,'')
            END AS route,
        	NULL AS flight_date,
        	NULL AS rating_seat_comfort,
        	NULL AS rating_cabin_staff_service,
        	NULL AS rating_food_and_beverages,
        	NULL AS rating_ground_service,
        	NULL AS rating_inflight_entertainment,
        	NULL AS rating_wifi_and_connectivity,
        	NULL AS rating_value_for_money,
        	NULL AS recommended,
            CASE WHEN L.luggage_issue = 1 THEN true ELSE false END AS is_lost_luggage_flag,
            1.0 AS is_lost_luggage_score,
            NULL AS "sentiment_label",
            NULL AS "sentiment_scores",
            'From_Reddit' AS created_by,
            R.created_utc AS created_date,
            'From_Reddit' AS lastmodified_by,
            R.created_utc AS lastmodified_date
        FROM bronze_reddit_reviews AS R
        INNER JOIN labeling_values AS L ON L.post_id = R.post_id
        INNER JOIN bronze_airline_codes AS AC ON AC."ICAO" = L.responsible_airline
        WHERE L.responsible_airline != 'unknown'
        AND L.aviation_related = 1
    """)
    connection.commit()
    print("View 'silver_consolidated_airline_reviews' created successfully.")

except (Exception, psycopg2.Error) as error:
    print("Error during drop-create view operation:", error)
finally:
    if connection:
        cursor.close()
        connection.close()

View 'silver_consolidated_airline_reviews' created successfully.


In [25]:
results = None
colnames = None

try:
    # Connect to the PostgreSQL database
    connection = psycopg2.connect(
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD,
        host=DB_HOST,
        port=DB_PORT
    )

    cursor = connection.cursor()
    
    query = 'SELECT * FROM "silver_consolidated_airline_reviews"'
     
    cursor.execute(query)
     
    results = cursor.fetchall()

    # Get the column names
    colnames = [desc[0] for desc in cursor.description]

    # Convert the results to a pandas DataFrame
    df = pd.DataFrame(results, columns=colnames)

except (Exception, psycopg2.Error) as error:
    print("Error during select operation:", error)
finally:
    if connection:
        cursor.close()
        connection.close()

print(len(df))
print(df.columns)
df.head()

24464
Index(['airline_review_id', 'airline_name', 'rating_overall', 'review_title',
       'review_date', 'verified', 'review_text', 'aircraft',
       'type_of_traveller', 'seat_type', 'route', 'flight_date',
       'rating_seat_comfort', 'rating_cabin_staff_service',
       'rating_food_and_beverages', 'rating_ground_service',
       'rating_inflight_entertainment', 'rating_wifi_and_connectivity',
       'rating_value_for_money', 'recommended', 'is_lost_luggage_flag',
       'is_lost_luggage_score', 'sentiment_label', 'sentiment_scores',
       'created_by', 'created_date', 'lastmodified_by', 'lastmodified_date'],
      dtype='object')


Unnamed: 0,airline_review_id,airline_name,rating_overall,review_title,review_date,verified,review_text,aircraft,type_of_traveller,seat_type,...,rating_value_for_money,recommended,is_lost_luggage_flag,is_lost_luggage_score,sentiment_label,sentiment_scores,created_by,created_date,lastmodified_by,lastmodified_date
0,Kd5735d894b4092e822168ba53a488a01fea59947c7b59...,AB Aviation,9.0,"""pretty decent airline""",2019-11-11,True,Moroni to Moheli. Turned out to be a pretty ...,,Solo Leisure,Economy Class,...,3.0,yes,False,0.809639,positive,0.918486,From_AirlineQuality,2025-03-29 00:11:21.944102,From_AirlineQuality,2025-03-29 00:11:21.944102
1,K7966382a7e6a7378845e6e7606deb05c3cd22ed48eb13...,AB Aviation,1.0,"""Not a good airline""",2019-06-25,True,Moroni to Anjouan. It is a very small airline...,E120,Solo Leisure,Economy Class,...,2.0,no,False,0.806561,negative,0.56924,From_AirlineQuality,2025-03-29 00:11:21.944102,From_AirlineQuality,2025-03-29 00:11:21.944102
2,Kb7272f236f9dd03ff3ddc7890a8c56ef10e4b453f8762...,AB Aviation,1.0,"""flight was fortunately short""",2019-06-25,True,Anjouan to Dzaoudzi. A very small airline an...,Embraer E120,Solo Leisure,Economy Class,...,2.0,no,False,0.815163,neutral,0.54281,From_AirlineQuality,2025-03-29 00:11:21.944102,From_AirlineQuality,2025-03-29 00:11:21.944102
3,K2a1ca52e66cecb27b117aaa639137b03a903771bdc358...,Adria Airways,1.0,"""I will never fly again with Adria""",2019-09-28,False,Please do a favor yourself and do not fly wi...,,Solo Leisure,Economy Class,...,1.0,no,True,0.848293,negative,0.90333,From_AirlineQuality,2025-03-29 00:11:21.944102,From_AirlineQuality,2025-03-29 00:11:21.944102
4,K0919fc2c8d770679071d9c4626486d411c386f1dcbcf1...,Adria Airways,1.0,"""it ruined our last days of holidays""",2019-09-24,True,Do not book a flight with this airline! My fr...,,Couple Leisure,Economy Class,...,1.0,no,False,0.811869,negative,0.902472,From_AirlineQuality,2025-03-29 00:11:21.944102,From_AirlineQuality,2025-03-29 00:11:21.944102
