In [26]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
import psycopg2
import logging

In [27]:
def create_tables(engine, logging):
    with engine.connect() as connection:
        try:
            connection.execute(text("""DROP TABLE IF EXISTS Hotel, Amenity, Hotel_Amenity CASCADE"""))
            
            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS Hotel (
                    id SERIAL PRIMARY KEY,
                    city varchar(255) NOT NULL,
                    link varchar(255) NOT NULL,
                    name varchar(255) NOT NULL,
                    rating float8 DEFAULT -1,
                    price float8 DEFAULT -1.0,
                    longitude float8,
                    latitude float8
                )
            """))

            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS Amenity (
                    id SERIAL PRIMARY KEY,
                    name TEXT NOT NULL
                )
            """))

            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS Hotel_Amenity (
                    hotel_id INTEGER,
                    amenity_id INTEGER,
                    PRIMARY KEY (hotel_id, amenity_id),
                    FOREIGN KEY (hotel_id) REFERENCES Hotel(id),
                    FOREIGN KEY (amenity_id) REFERENCES Amenity(id)
                )
            """))
            
            connection.commit()
            print("Table creation successful!")
        except Exception as e:
            logging.error(f"Error during creating tables: {e}")
            print("Error during creating tables, see the insert_data.log file")
            connection.rollback()

In [28]:
def insert_data(engine, data, logging):
    amenity_id_mapping = {}
    
    with engine.connect() as connection:
        try:
            for _, row in data.iterrows():
                curr_params = {"c":row['City'], "l":row['Link'], "n":row['Name'], "r":row['Rating'], "p":row['Price'], "lon":row['Longitude'], "lat":row['Latitude']}

                stmt = text("""INSERT INTO Hotel(city, link, name, rating, price, longitude, latitude) VALUES(:c, :l, :n, :r, :p, :lon, :lat) RETURNING id""")

                result = connection.execute(stmt.bindparams(**curr_params))

                hotel_id = result.fetchone()[0]
                
                amenities = [c.strip() for c in str(row['Amenity']).split(',')]
                for amenity in amenities:
                    if amenity not in amenity_id_mapping:
                        stmt_amenity = text("""INSERT INTO Amenity (name) VALUES (:amenity) RETURNING id""")
                        cur_amenity = {"amenity":amenity}
                        result = connection.execute(stmt_amenity.bindparams(**cur_amenity))
                        amenity_id_mapping[amenity] = result.fetchone()[0]

                    amenity_id = amenity_id_mapping[amenity]

                    stmt_relation = text("""INSERT INTO Hotel_Amenity (hotel_id, amenity_id) VALUES (:hid, :aid)""")
                    curr_ids = {"hid":hotel_id, "aid":amenity_id}

                    existing_stmt = text("""SELECT 1 FROM Hotel_Amenity WHERE hotel_id = :hid AND amenity_id = :aid""")
                    existing_record = connection.execute(existing_stmt.bindparams(**curr_ids)).fetchone()

                    if not existing_record:
                        connection.execute(stmt_relation.bindparams(**curr_ids))
                        
            connection.commit()
            print("Data insertion successful!")
            
        except Exception as e:
            logging.error(f"Error during data insertion: {e}")
            print("Error during data insertion, see the insert_data.log file")
            connection.rollback()

        finally:
            connection.close()

In [30]:
csv_file_path = "<csv_path>"
postgresql_url = 'postgresql://<username>:<password>@<host>:<port>/<db_name>'
logging.basicConfig(filename='insert_data.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s: %(message)s')
    
data = pd.read_csv(csv_file_path, encoding='utf-8-sig')

engine = create_engine(postgresql_url)

create_tables(engine, logging)

insert_data(engine, data, logging)

  csv_file_path = "D:\TOBB\Bil496\scripts\hotels_canakkale_v1_for_db.csv"


Table creation successful!
Data insertion successful!
