# Loads csv files to postgres

In [24]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
import langdetect
from tqdm import tqdm
from openai import OpenAI

load_dotenv()
LOCAL_POSTGRES = os.getenv('LOCAL_POSTGRES')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=OPENAI_API_KEY)

## Load reviews table

In [25]:
# Database connection
engine = create_engine(LOCAL_POSTGRES)

# Load CSV into DataFrame
df = pd.read_csv("output/garden_court_sandton.csv")
df

# import psycopg2
# from psycopg2.extras import execute_values

# conn = engine.raw_connection()
# cursor = conn.cursor()

# # Define SQL with conflict handling
# sql = """
# INSERT INTO reviews (column1, column2, column3) 
# VALUES %s
# ON CONFLICT DO NOTHING
# """

# try:
#     execute_values(cursor, sql, df.values.tolist())
#     conn.commit()
#     print(f'Inserted {cursor.rowcount} rows into the reviews table')
# except Exception as e:
#     print(f"Error inserting data: {e}")
# finally:
#     cursor.close()
#     conn.close()


Unnamed: 0,hotel_id,hotel_name,source_name,positive_review,negative_review,review_rating,reviewer_name,country,sentiment,reviewer_check_in_date,review_created_date,apartment_type,length_nights_stay,group_type,review_feedback,seen,review_text
0,GARCOUR1,Garden Court Sandton City,booking.com,Very helpful stuff and professional.,,10.0,Dina,Egypt,joy,September 2024,"September 19, 2024",Standard Double Room with 2 Double Beds - Non ...,4,Family,"Dear Dina, eg\n\nWe thank you for taking the t...",False,Positive: Very helpful stuff and professional....
1,GARCOUR1,Garden Court Sandton City,booking.com,Convenient place,Nothing,9.0,Michelle,Zimbabwe,joy,December 2024,"March 18, 2025",Standard Double Room with 2 Double Beds - Non ...,8,Family,,False,Positive: Convenient place negative: Nothing
2,GARCOUR1,Garden Court Sandton City,booking.com,"Near Sandton City mall, convenient shopping.",Bed was a bit uncomfortable.,7.0,Tlhobogang,Botswana,neutral,March 2025,"March 17, 2025",Standard Double Room with 2 Double Beds - Non ...,2,Family,"Dear Tlhobogang, bw\n\nWe thank you for taking...",False,"Positive: Near Sandton City mall, convenient s..."
3,GARCOUR1,Garden Court Sandton City,booking.com,clean staff excellent service,rooms are small\ndesk very impractical and sm...,8.0,Randy,South Africa,neutral,March 2025,"March 17, 2025",Standard Queen Room - Smoking,2,Group,,False,Positive: clean staff excellent service negati...
4,GARCOUR1,Garden Court Sandton City,booking.com,Convenient,,7.0,Breitenstein,Zimbabwe,joy,March 2025,"March 17, 2025",Standard Queen Room - Non Smoking,1,Solo traveler,,False,Positive: Convenient negative: None
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,GARCOUR1,Garden Court Sandton City,booking.com,Clean rooms,,8.0,Morwa,South Africa,joy,June 2024,"June 17, 2024",Standard Queen Room - Non Smoking,1,Solo traveler,"Dear Morwa Anastecia, za\n\nWe thank you for t...",False,Positive: Clean rooms negative: None
486,GARCOUR1,Garden Court Sandton City,booking.com,"Location, room size n breakfast","No facility of tooth brush, paste and no enter...",8.0,Sanjay,India,neutral,June 2024,"June 16, 2024",Standard Queen Room - Non Smoking,7,Solo traveler,"Dear SANJAY, in\n\nWe thank you for taking the...",False,"Positive: Location, room size n breakfast nega..."
487,GARCOUR1,Garden Court Sandton City,booking.com,"This is my first trip to South Africa, Johanne...",Nothing,8.0,Vijay,Seychelles,neutral,June 2024,"June 15, 2024",Standard Queen Room - Non Smoking,3,Solo traveler,"Dear Vijay, sc\n\nWe thank you for taking the ...",False,Positive: This is my first trip to South Afric...
488,GARCOUR1,Garden Court Sandton City,booking.com,Perfect location if you want to shop in Sandton,The room was close to the lift area hence the ...,7.0,Pamela,Zimbabwe,neutral,June 2024,"June 14, 2024",Standard Queen Room - Non Smoking,1,Couple,"Dear Pamela, zw\n\nWe thank you for taking the...",False,Positive: Perfect location if you want to shop...


## Convert date strings to date objects

In [26]:
# convert dates to date objects
df["review_created_date"] = pd.to_datetime(df["review_created_date"], errors="coerce").dt.date

# Function to convert check in date to correct date format
def convert_to_date(date_str):
    month, year = date_str.split()  # Split into month and year
    year = f"20{year}"  # Convert two-digit year to four-digit (assumes 20xx)
    return pd.to_datetime(f"1 {month} {year}").strftime("%Y-%m-%d")

# Apply function to the column
# df["reviewer_check_in_date"] = df["reviewer_check_in_date"].apply(convert_to_date)
df["reviewer_check_in_date"] = pd.to_datetime(df["reviewer_check_in_date"])


df.head()

  df["reviewer_check_in_date"] = pd.to_datetime(df["reviewer_check_in_date"])


Unnamed: 0,hotel_id,hotel_name,source_name,positive_review,negative_review,review_rating,reviewer_name,country,sentiment,reviewer_check_in_date,review_created_date,apartment_type,length_nights_stay,group_type,review_feedback,seen,review_text
0,GARCOUR1,Garden Court Sandton City,booking.com,Very helpful stuff and professional.,,10.0,Dina,Egypt,joy,2024-09-01,2024-09-19,Standard Double Room with 2 Double Beds - Non ...,4,Family,"Dear Dina, eg\n\nWe thank you for taking the t...",False,Positive: Very helpful stuff and professional....
1,GARCOUR1,Garden Court Sandton City,booking.com,Convenient place,Nothing,9.0,Michelle,Zimbabwe,joy,2024-12-01,2025-03-18,Standard Double Room with 2 Double Beds - Non ...,8,Family,,False,Positive: Convenient place negative: Nothing
2,GARCOUR1,Garden Court Sandton City,booking.com,"Near Sandton City mall, convenient shopping.",Bed was a bit uncomfortable.,7.0,Tlhobogang,Botswana,neutral,2025-03-01,2025-03-17,Standard Double Room with 2 Double Beds - Non ...,2,Family,"Dear Tlhobogang, bw\n\nWe thank you for taking...",False,"Positive: Near Sandton City mall, convenient s..."
3,GARCOUR1,Garden Court Sandton City,booking.com,clean staff excellent service,rooms are small\ndesk very impractical and sm...,8.0,Randy,South Africa,neutral,2025-03-01,2025-03-17,Standard Queen Room - Smoking,2,Group,,False,Positive: clean staff excellent service negati...
4,GARCOUR1,Garden Court Sandton City,booking.com,Convenient,,7.0,Breitenstein,Zimbabwe,joy,2025-03-01,2025-03-17,Standard Queen Room - Non Smoking,1,Solo traveler,,False,Positive: Convenient negative: None


## Convert non-English reviews to English

In [27]:
# Function to detect if a text is non-English
def is_non_english(text):
    try:
        return langdetect.detect(text) != "en"
    except:
        return False  # Default to English if detection fails

# Function to translate text using GPT-3.5
def translate_text(text):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful AI that translates text to English."},
                {"role": "user", "content": f"Translate the following text to English:\n\n{text}"}
            ],
            temperature=0.3,
            max_tokens=100,
        )
        # return response["choices"][0]["message"]["content"].strip()
        translation = response.choices[0].message.content.strip()
        # print('Translation: ', translation)
        return translation
    except Exception as e:
        print(f"Translation error: {e}")
        return text  # Return original text in case of an error

# Apply translation only to non-English reviews
tqdm.pandas()  # Enable progress bar

df["review_text"] = df["review_text"].progress_apply(
    lambda x: translate_text(x) if is_non_english(x) else x
)

# df["negative_review"] = df["negative_review"].progress_apply(
#     lambda x: translate_text(x) if is_non_english(x) else x
# )


  1%|          | 6/490 [00:00<00:29, 16.18it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


  7%|▋         | 32/490 [00:00<00:12, 35.99it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 17%|█▋        | 81/490 [00:01<00:05, 78.44it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 20%|██        | 99/490 [00:01<00:05, 67.01it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 26%|██▌       | 125/490 [00:02<00:05, 66.88it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 35%|███▌      | 172/490 [00:02<00:03, 91.56it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 50%|████▉     | 244/490 [00:03<00:01, 125.05it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 61%|██████▏   | 301/490 [00:03<00:01, 132.28it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 72%|███████▏  | 352/490 [00:04<00:01, 128.26it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 78%|███████▊  | 384/490 [00:04<00:00, 158.57it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


 89%|████████▉ | 435/490 [00:04<00:00, 104.43it/s]

Translation error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************YWIA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


100%|██████████| 490/490 [00:05<00:00, 96.13it/s] 


In [28]:
# checking string length for each string type column so it doesn't give LLM error
for col in ["hotel_name", "reviewer_name", "source_name", "sentiment", "country", "group_type"]:
    max_len = df[col].astype(str).apply(len).max()
    print(f"Max length in column {col}: {max_len}")

Max length in column hotel_name: 25
Max length in column reviewer_name: 12
Max length in column source_name: 11
Max length in column sentiment: 8
Max length in column country: 32
Max length in column group_type: 13


In [30]:
# Append data to the PostgreSQL table
num_rows_inserted = df.to_sql("reviews", engine, if_exists="append", index=False)
print(f'inserted {num_rows_inserted} rows into the reviews table')


inserted 490 rows into the reviews table
