In [120]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
import psycopg2
import logging

In [121]:
def create_tables(engine, logging):
    with engine.connect() as connection:
        try:
            connection.execute(text("""DROP TABLE IF EXISTS Restaurant, Cuisine, RestaurantCuisine CASCADE"""))
            
            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS Restaurant (
                    id SERIAL PRIMARY KEY,
                    city varchar(255) NOT NULL,
                    link varchar(255) NOT NULL,
                    name varchar(255) NOT NULL,
                    rating float8 DEFAULT -1,
                    price_lower float8 DEFAULT -1.0,
                    price_higher float8 DEFAULT -1.0
                )
            """))

            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS Cuisine (
                    id SERIAL PRIMARY KEY,
                    name TEXT NOT NULL
                )
            """))

            connection.execute(text("""
                CREATE TABLE IF NOT EXISTS RestaurantCuisine (
                    restaurant_id INTEGER,
                    cuisine_id INTEGER,
                    PRIMARY KEY (restaurant_id, cuisine_id),
                    FOREIGN KEY (restaurant_id) REFERENCES Restaurant(id),
                    FOREIGN KEY (cuisine_id) REFERENCES Cuisine(id)
                )
            """))
            
            connection.commit()
            print("Table creation successful!")
        except Exception as e:
            logging.error(f"Error during creating tables: {e}")
            print("Error during creating tables, see the insert_data.log file")
            connection.rollback()

In [122]:
def insert_data(engine, data, logging):
    cuisine_id_mapping = {}
    
    with engine.connect() as connection:
        try:
            for _, row in data.iterrows():
                curr_params = {"c":row['City'], "l":row['Link'], "n":row['Name'], "r":row['Rating'], "pl":row['Price_Lower'], "ph":row['Price_Higher']}

                stmt = text("""INSERT INTO Restaurant(city, link, name, rating, price_lower, price_higher) VALUES(:c, :l, :n, :r, :pl, :ph) RETURNING id""")

                result = connection.execute(stmt.bindparams(**curr_params))

                restaurant_id = result.fetchone()[0]
                
                cuisines = [c.strip() for c in str(row['Cuisine']).split(',')]
                for cuisine in cuisines:
                    if cuisine not in cuisine_id_mapping:
                        stmt_cuisine = text("""INSERT INTO Cuisine (name) VALUES (:cuisine) RETURNING id""")
                        cur_cuisine = {"cuisine":cuisine}
                        result = connection.execute(stmt_cuisine.bindparams(**cur_cuisine))
                        cuisine_id_mapping[cuisine] = result.fetchone()[0]

                    cuisine_id = cuisine_id_mapping[cuisine]

                    stmt_relation = text("""INSERT INTO RestaurantCuisine (restaurant_id, cuisine_id) VALUES (:rid, :cid)""")
                    curr_ids = {"rid":restaurant_id, "cid":cuisine_id}

                    existing_stmt = text("""SELECT 1 FROM RestaurantCuisine WHERE restaurant_id = :rid AND cuisine_id = :cid""")
                    existing_record = connection.execute(existing_stmt.bindparams(**curr_ids)).fetchone()

                    if not existing_record:
                        connection.execute(stmt_relation.bindparams(**curr_ids))
                        
            connection.commit()
            print("Data insertion successful!")
            
        except Exception as e:
            logging.error(f"Error during data insertion: {e}")
            print("Error during data insertion, see the insert_data.log file")
            connection.rollback()

        finally:
            connection.close()

In [123]:
csv_file_path = "<path_to_csv>"
postgresql_url = 'postgresql://<username>:<password>@<host>:<port>/<db_name>'
logging.basicConfig(filename='insert_data.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s: %(message)s')
    
data = pd.read_csv(csv_file_path, encoding='utf-8-sig')

engine = create_engine(postgresql_url)

create_tables(engine, logging)

insert_data(engine, data, logging)

Table creation successful!
Data insertion successful!
