### 0. Imports

In [470]:
import pandas as pd

# work with environment variables
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve database credentials
USERNAME = os.getenv("DATABASE_USERNAME")
PASSWORD = os.getenv("DATABASE_PASSWORD")

# append parent folder to path
import sys
sys.path.append("..")

# import data load support functions
import src.data_load_support as dls

# import database connection support functions
import src.database_connection_support as dcs

# 1. Introduction to this notebook

# 2. Database design

In [471]:
airports = pd.read_parquet("../data/airport_codes/transformed/countries_airports.parquet")
airports.head()

Unnamed: 0,country,city_name,city_entityid,airport_skyid,airport_entityid,airport_name
0,spain,madrid,27544850,MAD,95565077,madrid
1,spain,barcelona,27548283,BCN,95565085,barcelona
2,spain,port of spain,27546011,POS,104120358,port of spain
3,spain,malaga,27547484,AGP,95565095,malaga
4,spain,seville,27547022,SVQ,95565089,seville


## 2.1 Cities

```sql
-- Create cities table
CREATE TABLE cities (
    city_entityid SERIAL PRIMARY KEY,
    city_name VARCHAR(255) NOT NULL,
    country VARCHAR(100)
);
```

In [472]:
cities = airports[["country","city_name","city_entityid"]]

## 2.2 Flights

### 2.2.1 Flight tables - Airports

```sql
-- Create airports table
CREATE TABLE airports (
    airport_entityid SERIAL PRIMARY KEY,
    airport_skyid VARCHAR(10) NOT NULL,
    airport_name VARCHAR(255) NOT NULL,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL
);
```

- airport_entityid PK
- airport_skyid
- airport_name
- city_entityid


In [473]:
airports = airports[["airport_entityid","airport_skyid","airport_name","city_entityid"]]

### 2.2.2 Flight tables - Flights

In [474]:
itineraries = pd.read_parquet("../data/flights/transformed/itineraries.parquet")
itineraries.head()

Unnamed: 0,itinerary_id,query_date,score,duration,price,price_currency,stops,departure,arrival,company,...,fare_is_change_allowed,fare_is_partially_changeable,fare_is_cancellation_allowed,fare_is_partially_refundable,origin_airport,destination_airport,origin_airport_code,destination_airport_code,origin_airport_entityid,destination_airport_entityid
0,9772-2411031955--32222-0-13870-2411032120,2024-11-03 16:45:48.489825,0.999,85,164,€,0,2024-11-03 19:55:00,2024-11-03 21:20:00,Iberia,...,False,False,False,False,Barcelona,Madrid,BCN,MAD,95565085,95565077
1,9772-2411031625--31685-0-13870-2411031745,2024-11-03 16:45:48.491871,0.784844,80,175,€,0,2024-11-03 16:25:00,2024-11-03 17:45:00,Vueling Airlines,...,False,False,False,False,Barcelona,Madrid,BCN,MAD,95565085,95565077
2,9772-2411031800--32222-0-13870-2411031925,2024-11-03 16:45:48.493592,0.533734,85,234,€,0,2024-11-03 18:00:00,2024-11-03 19:25:00,Iberia,...,False,False,False,False,Barcelona,Madrid,BCN,MAD,95565085,95565077
3,9772-2411032105--32222-0-13870-2411032230,2024-11-03 16:45:48.493891,0.382849,85,175,€,0,2024-11-03 21:05:00,2024-11-03 22:30:00,Iberia,...,False,False,False,False,Barcelona,Madrid,BCN,MAD,95565085,95565077
4,9772-2411031625--32222-0-13870-2411031745,2024-11-03 16:45:48.495897,0.375225,80,359,€,0,2024-11-03 16:25:00,2024-11-03 17:45:00,Iberia,...,False,False,False,False,Barcelona,Madrid,BCN,MAD,95565085,95565077


- itinerary_id PK
- origin_airport_entityid FK
- destination_airport_entityid FK
- departure_datetime
- arrival_datetime
- company
- self_transfer
- fare_is_change_allowed
- fare_is_partially_changeable
- fare_is_cancellation_allowed
- fare_is_partially_refundable


In [475]:
flights = itineraries[["itinerary_id","origin_airport_entityid","destination_airport_entityid","departure","arrival","company","self_transfer","fare_is_change_allowed","fare_is_partially_changeable", "fare_is_cancellation_allowed","fare_is_partially_refundable"]]

### 2.2.3 Flight tables - Flight_prices

- price_id PK
- itinerary_id FK
- query_date
- price
- price_currency
- score

In [476]:
flight_prices = itineraries[["itinerary_id","query_date","price","price_currency","score"]]

## 2.2 Accommodations

In [477]:
accommodations_df = pd.read_parquet("../data/accommodations/transformed/booking.parquet")
accommodations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 28 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   query_date               233 non-null    datetime64[ns]
 1   city                     233 non-null    object        
 2   checkin                  233 non-null    datetime64[ns]
 3   checkout                 233 non-null    datetime64[ns]
 4   n_adults_search          233 non-null    object        
 5   n_children_search        233 non-null    object        
 6   n_rooms_search           233 non-null    object        
 7   name                     233 non-null    object        
 8   url                      233 non-null    object        
 9   price_currency           233 non-null    object        
 10  total_price_amount       233 non-null    object        
 11  distance_city_center_km  233 non-null    float64       
 12  score                    223 non-nul

Booking places

- place_id PK
- city_name FK
- name
- url
- distance_city_center_km
- score
- n_comments
- close_to_metro
- sustainability_cert
- location_score

In [478]:
booking_places = accommodations_df[["city","name","url","distance_city_center_km","score","n_comments","close_to_metro","sustainability_cert","location_score"]]

Accommodations

- accommodation_id PK
- place_id PK
- room_type
- standardized_room_type
- double_bed
- single_bed
- shared_bathroom
- balcony

In [479]:
accommodations = accommodations_df[["room_type","standardized_room_type","double_bed","single_bed","shared_bathroom","balcony"]]

Prices
- price_id PK
- accommodation_id FK
- query_date
- checkin
- checkout
- n_adults
- n_children
- n_rooms
- price_night
- price_currency
- free_cancellation
- pay_at_hotel
- free_taxi

In [480]:
accommodations_df.columns

Index(['query_date', 'city', 'checkin', 'checkout', 'n_adults_search',
       'n_children_search', 'n_rooms_search', 'name', 'url', 'price_currency',
       'total_price_amount', 'distance_city_center_km', 'score', 'n_comments',
       'close_to_metro', 'sustainability_cert', 'room_type', 'double_bed',
       'single_bed', 'free_cancellation', 'breakfast_included', 'pay_at_hotel',
       'location_score', 'free_taxi', 'standardized_room_type',
       'shared_bathroom', 'balcony', 'price_night'],
      dtype='object')

In [481]:
accommodation_prices = accommodations_df[["query_date","checkin","checkout","n_adults_search","n_children_search","n_rooms_search","price_night","price_currency","free_cancellation","pay_at_hotel","free_taxi"]]

## 2.3 Activities and availability

In [482]:
activities = pd.read_parquet("../data/activities/transformed/activities.parquet")
availabilities = pd.read_parquet("../data/activities/transformed/availabilities.parquet")
activities.head(3)

Unnamed: 0,query_date,city,activity_date_range_start,activity_date_range_end,activity_name,description,url,image,available_days,available_times,duration,latitude,longitude,price,currency,category,spanish,address
0,2024-11-03 12:47:29.458558,barcelona,2024-11-28,2024-12-04,Tour privado por Barcelona ¡Tú eliges!,Reservando nuestra visita privada tendréis un ...,www.civitatis.com/es/barcelona/tour-privado-ba...,www.civitatis.com/f/espana/barcelona/tour-priv...,"[30, 29, 02, 03, 01, 28, 04]","[[9:00, 10:00, 11:00, 12:00, 13:00, 14:00, 15:...",2h 30m -5h,0.0,0.0,40.0,EUR,Visitas guiadas y free tours,Español,Address not found
1,2024-11-03 12:47:29.464586,barcelona,2024-11-28,2024-12-04,Excursión a Montserrat + Visita a una bodega,En esta excursión a Montserrat no solo disfrut...,www.civitatis.com/es/barcelona/tour-tapas-vino...,www.civitatis.com/f/espana/barcelona/tour-tapa...,"[04, 30, 28, 03, 02, 01, 29]","[[8:45], [8:45], [8:45], [8:45], [8:45], [8:45...",7h 30m,41.3940236912484,2.181866082214644,19.98,EUR,Gastronomía y enoturismo,Español,"Barcelona (Bus Terminal Nord), Eixample, 08013..."
2,2024-11-03 12:47:29.468587,barcelona,2024-11-28,2024-12-04,Paseo en catamarán al atardecer con música en ...,Contempla el skyline de Barcelona mientras dis...,www.civitatis.com/es/barcelona/paseo-catamaran...,www.civitatis.com/f/espana/barcelona/paseo-cat...,"[30, 29, 01, 04, 28, 02, 03]","[[16:30], [16:30], [16:30], [16:30], [16:30], ...",1h 30m,41.37495867288118,2.17849589524371,7.65,EUR,Paseos en barco,Español,"Moll de les Drassanes, 3P, Ciutat Vella, 08039..."


In [483]:
availabilities.head(3)

Unnamed: 0,query_date,available_times,city,activity_name,activity_date_range_start,activity_date_range_end,available_date
0,2024-11-03 12:47:29.458558,09:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,10:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,11:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30


The activites have persistent information in their name, description, url, image, duration, latitude, longitude, category, spanish (language) and address. Thus, it makes for one single table of the database.

### Activities table

- activity_id PK
- activity_name
- city
- description
- url
- image
- duration
- latitude
- longitude
- category
- spanish
- address

In [484]:
activities_table = activities[["activity_name","city","description","url","image","duration","latitude","longitude","category","spanish","address"]]

Then, there is more time dependant informations about them, which are:
- Available date and time
- Price

Both of these are bound to query time, as prices and availabilities are subject to change, due to possible offers or changes in demand. following the rules of normalisation, they represent 2 separate tables to avoid duplicities in information.

#### Activities_prices

The price_id will be serial.

- price_id PK
- activity_id FK
- query_date
- price
- currency

The table will need the activity name and city to check for the activity_id in the database, and then map the ids to the table to upload it all.

If streamlined, it will have to do that activity by activity.

In [485]:
activities_prices = activities[["activity_name","city","query_date","price","currency"]]

#### Activities_availabilities

- schedule_id PK
- activity_id FK
- query_date
- available_date
- available_time

In [486]:
availabilities

Unnamed: 0,query_date,available_times,city,activity_name,activity_date_range_start,activity_date_range_end,available_date
0,2024-11-03 12:47:29.458558,09:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,10:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,11:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,12:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
0,2024-11-03 12:47:29.458558,13:00:00,barcelona,Tour privado por Barcelona ¡Tú eliges!,2024-11-28,2024-12-04,2024-11-30
...,...,...,...,...,...,...,...
1177,2024-11-03 12:47:29.615915,,barcelona,Tour de los misterios y leyendas de Barcelona,2024-12-04,2024-12-10,NaT
1178,2024-11-03 12:47:29.616916,,barcelona,Entrada al Spotify Camp Nou,2024-12-04,2024-12-10,NaT
1179,2024-11-03 12:47:29.617916,,barcelona,Free tour de los misterios y leyendas del Barr...,2024-12-04,2024-12-10,NaT
1180,2024-11-03 12:47:29.618916,,barcelona,"Excursión a Gerona, Figueras y Museo Dalí",2024-12-04,2024-12-10,NaT


In [487]:
activities_schedules = availabilities[["activity_name","city","query_date","available_date","available_times"]]

Cities

# 3. Database creation

## 3.1 Database creation - database

In [488]:
database_credentials = {"username":USERNAME,
                        "password":PASSWORD,
                        }

In [489]:
dls.create_db("travel_planner", credentials_dict=database_credentials)

Database already existant.


## 3.2 Database creation - tables

In [513]:
drop_tables = "DROP TABLE IF EXISTS cities, airports, flights, flight_prices, booking_places, accommodations, accommodation_prices, activities, activity_prices, activity_availabilities CASCADE;"

create_cities = """
CREATE TABLE cities (
    city_entityid SERIAL PRIMARY KEY,
    city_name VARCHAR(255) NOT NULL,
    country VARCHAR(100)
);
"""

create_airports = """
CREATE TABLE airports (
    airport_entityid SERIAL PRIMARY KEY,
    airport_skyid VARCHAR(10) NOT NULL,
    airport_name VARCHAR(255) NOT NULL,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL
);
"""

create_flights = """
CREATE TABLE flights (
    itinerary_id VARCHAR(255) PRIMARY KEY,
    origin_airport_entityid INT REFERENCES airports(airport_entityid),
    destination_airport_entityid INT REFERENCES airports(airport_entityid),
    departure_datetime TIMESTAMP NOT NULL,
    arrival_datetime TIMESTAMP NOT NULL,
    company VARCHAR(100),
    self_transfer BOOLEAN,
    fare_is_change_allowed BOOLEAN,
    fare_is_partially_changeable BOOLEAN,
	fare_is_cancellation_allowed BOOLEAN,
	fare_is_partially_refundable BOOLEAN
);
"""

create_flight_prices = """
CREATE TABLE flight_prices (
    price_id SERIAL PRIMARY KEY,
    itinerary_id VARCHAR(255) REFERENCES flights(itinerary_id),
    query_date TIMESTAMP NOT NULL,
    price NUMERIC NOT NULL,
    price_currency VARCHAR(10) NOT NULL,
    score NUMERIC
);
"""

create_booking_places = """
CREATE TABLE booking_places (
    place_id SERIAL PRIMARY KEY,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL,
    name VARCHAR(255) NOT NULL,
    url VARCHAR(255),
    distance_city_center_km NUMERIC,
    score NUMERIC,
    n_comments INT,
    close_to_metro BOOLEAN,
    sustainability_cert BOOLEAN,
    location_score NUMERIC
);
"""

create_accommodations = """
CREATE TABLE accommodations (
    accommodation_id SERIAL PRIMARY KEY,
    place_id INT REFERENCES booking_places(place_id),
    room_type VARCHAR(100),
    standardized_room_type VARCHAR(100),
    double_bed INT DEFAULT 0,
    single_bed INT DEFAULT 0,
    shared_bathroom BOOLEAN,
    balcony BOOLEAN
);
"""

create_accommodation_prices = """
CREATE TABLE accommodation_prices (
    price_id SERIAL PRIMARY KEY,
    accommodation_id INT REFERENCES accommodations(accommodation_id),
    query_date TIMESTAMP NOT NULL,
    checkin DATE NOT NULL,
    checkout DATE NOT NULL,
    n_adults INT NOT NULL,
    n_children INT DEFAULT 0,
    n_rooms INT NOT NULL,
    price_night NUMERIC NOT NULL,
    price_currency VARCHAR(4) NOT NULL,
    free_cancellation BOOLEAN,
    pay_at_hotel BOOLEAN,
    free_taxi BOOLEAN
);
"""

create_activities = """
CREATE TABLE activities (
    activity_id SERIAL PRIMARY KEY,
    activity_name VARCHAR(500) NOT NULL,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL,
    description TEXT,
    url VARCHAR(500),
    image VARCHAR(500),
    duration VARCHAR(30),
    latitude NUMERIC,
    longitude NUMERIC,
    category VARCHAR(100),
    spanish VARCHAR(30),
    address VARCHAR(500)
);
"""

create_activity_prices = """
CREATE TABLE activity_prices (
    price_id SERIAL PRIMARY KEY,
    activity_id INT REFERENCES activities(activity_id) ON DELETE CASCADE,
    query_date DATE NOT NULL,
    price NUMERIC NOT NULL,
    currency VARCHAR(4) NOT NULL
);
"""

create_activity_availabilities = """
CREATE TABLE activity_availabilities (
    schedule_id SERIAL PRIMARY KEY,
    activity_id INT REFERENCES activities(activity_id) ON DELETE CASCADE,
    query_date TIMESTAMP NOT NULL,
    available_date DATE NOT NULL,
    available_time TIME NOT NULL
);
"""


In [514]:
create_table_queries = [
    drop_tables,
    create_cities,
    create_airports,
    create_flights,
    create_flight_prices,
    create_booking_places,
    create_accommodations,
    create_accommodation_prices,
    create_activities,
    create_activity_prices,
    create_activity_availabilities
]

In [515]:
conn = dcs.connect_to_database("travel_planner", credentials_dict=database_credentials, autocommit=True)

# drop all tables and create:
for query in create_table_queries:
    conn.cursor().execute(query)

## 3.3 Database creation - batch table insert

### 3.3.1 Cities

In [516]:
create_cities = """
CREATE TABLE cities (
    city_entityid SERIAL PRIMARY KEY,
    city_name VARCHAR(255) NOT NULL,
    country VARCHAR(100)
);
"""

In [517]:
cities.head(3)

Unnamed: 0,country,city_name,city_entityid
0,spain,madrid,27544850
1,spain,barcelona,27548283
2,spain,port of spain,27546011


In [518]:
conn.cursor().executemany(
"INSERT INTO cities (country, city_name, city_entityid) VALUES (%s,%s,%s)",
list(cities.itertuples(index=False, name=None))
)

### 3.3.2 Airports

In [519]:
create_airports = """
CREATE TABLE airports (
    airport_entityid SERIAL PRIMARY KEY,
    airport_skyid VARCHAR(10) NOT NULL,
    airport_name VARCHAR(255) NOT NULL,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL
);
"""

In [520]:
airports.head(3)

Unnamed: 0,airport_entityid,airport_skyid,airport_name,city_entityid
0,95565077,MAD,madrid,27544850
1,95565085,BCN,barcelona,27548283
2,104120358,POS,port of spain,27546011


In [521]:
conn.cursor().executemany(
"INSERT INTO airports (airport_entityid, airport_skyid, airport_name, city_entityid) VALUES (%s,%s,%s,%s)",
list(airports.itertuples(index=False, name=None))
)

### 3.3.3 Flights

In [522]:
create_flights = """
CREATE TABLE flights (
    itinerary_id VARCHAR(255) PRIMARY KEY,
    origin_airport_entityid INT REFERENCES airports(airport_entityid),
    destination_airport_entityid INT REFERENCES airports(airport_entityid),
    departure_datetime TIMESTAMP NOT NULL,
    arrival_datetime TIMESTAMP NOT NULL,
    company VARCHAR(100),
    self_transfer BOOLEAN,
    fare_is_change_allowed BOOLEAN,
    fare_is_partially_changeable BOOLEAN,
	fare_is_cancellation_allowed BOOLEAN,
	fare_is_partially_refundable BOOLEAN
);
"""

In [523]:
flights.head(3)

Unnamed: 0,itinerary_id,origin_airport_entityid,destination_airport_entityid,departure,arrival,company,self_transfer,fare_is_change_allowed,fare_is_partially_changeable,fare_is_cancellation_allowed,fare_is_partially_refundable
0,9772-2411031955--32222-0-13870-2411032120,95565085,95565077,2024-11-03 19:55:00,2024-11-03 21:20:00,Iberia,False,False,False,False,False
1,9772-2411031625--31685-0-13870-2411031745,95565085,95565077,2024-11-03 16:25:00,2024-11-03 17:45:00,Vueling Airlines,False,False,False,False,False
2,9772-2411031800--32222-0-13870-2411031925,95565085,95565077,2024-11-03 18:00:00,2024-11-03 19:25:00,Iberia,False,False,False,False,False


In [524]:
conn.cursor().executemany(
    """
    INSERT INTO flights (
        itinerary_id, 
        origin_airport_entityid, 
        destination_airport_entityid, 
        departure_datetime, 
        arrival_datetime, 
        company, 
        self_transfer, 
        fare_is_change_allowed, 
        fare_is_partially_changeable, 
        fare_is_cancellation_allowed, 
        fare_is_partially_refundable
    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """,
    list(flights.itertuples(index=False, name=None))
)

### 3.3.4 Flight prices

In [525]:
create_flight_prices = """
CREATE TABLE flight_prices (
    price_id SERIAL PRIMARY KEY,
    itinerary_id VARCHAR(255) REFERENCES flights(itinerary_id),
    query_date TIMESTAMP NOT NULL,
    price NUMERIC NOT NULL,
    price_currency VARCHAR(10) NOT NULL,
    score NUMERIC
);
"""

In [526]:
flight_prices.head(3)

Unnamed: 0,itinerary_id,query_date,price,price_currency,score
0,9772-2411031955--32222-0-13870-2411032120,2024-11-03 16:45:48.489825,164,€,0.999
1,9772-2411031625--31685-0-13870-2411031745,2024-11-03 16:45:48.491871,175,€,0.784844
2,9772-2411031800--32222-0-13870-2411031925,2024-11-03 16:45:48.493592,234,€,0.533734


In [527]:
conn.cursor().executemany(
    """
    INSERT INTO flight_prices (
        itinerary_id, 
        query_date, 
        price, 
        price_currency, 
        score
    ) VALUES (%s, %s, %s, %s, %s)
    """,
    list(flight_prices.itertuples(index=False, name=None))
)

### 3.3.5 Booking places

In [528]:
create_booking_places = """
CREATE TABLE booking_places (
    place_id SERIAL PRIMARY KEY,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL,
    name VARCHAR(255) NOT NULL,
    url VARCHAR(500),
    distance_city_center_km NUMERIC,
    score NUMERIC,
    n_comments INT,
    close_to_metro BOOLEAN,
    sustainability_cert BOOLEAN,
    location_score NUMERIC
);
"""

In [529]:
booking_places.head(3)

Unnamed: 0,city,name,url,distance_city_center_km,score,n_comments,close_to_metro,sustainability_cert,location_score
0,27548283,Pensión 45,https://www.booking.com/hotel/es/pension-45.es...,0.3,6.5,2798,Yes,No,
1,27548283,Nice and comfortable room for your stay in BCN,https://www.booking.com/hotel/es/nice-and-comf...,1.1,10.0,1,Yes,No,10.0
2,27548283,Travelodge Barcelona Poblenou,https://www.booking.com/hotel/es/travelodge-ba...,2.9,7.2,10150,Yes,No,


Map city entity id

In [530]:
citi_entityid_map = cities.set_index("city_name")["city_entityid"].to_dict()

In [531]:
booking_places["city"] = booking_places["city"].map(citi_entityid_map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  booking_places["city"] = booking_places["city"].map(citi_entityid_map)


In [532]:
booking_places.head(2)

Unnamed: 0,city,name,url,distance_city_center_km,score,n_comments,close_to_metro,sustainability_cert,location_score
0,,Pensión 45,https://www.booking.com/hotel/es/pension-45.es...,0.3,6.5,2798,Yes,No,
1,,Nice and comfortable room for your stay in BCN,https://www.booking.com/hotel/es/nice-and-comf...,1.1,10.0,1,Yes,No,10.0


In [535]:
booking_places.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   city                     0 non-null      float64
 1   name                     233 non-null    object 
 2   url                      233 non-null    object 
 3   distance_city_center_km  233 non-null    float64
 4   score                    223 non-null    object 
 5   n_comments               223 non-null    object 
 6   close_to_metro           233 non-null    object 
 7   sustainability_cert      233 non-null    object 
 8   location_score           55 non-null     object 
dtypes: float64(2), object(7)
memory usage: 16.5+ KB


In [537]:
booking_places[["n_comments","location_score","score","distance_city_center_km"]] = booking_places[["n_comments","location_score","score","distance_city_center_km"]].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  booking_places[["n_comments","location_score","score","distance_city_center_km"]] = booking_places[["n_comments","location_score","score","distance_city_center_km"]].astype(float)


In [None]:
create_booking_places = """
CREATE TABLE booking_places (
    place_id SERIAL PRIMARY KEY,
    city_entityid INT REFERENCES cities(city_entityid) ON DELETE SET NULL,
    name VARCHAR(255) NOT NULL,
    url VARCHAR(500),
    distance_city_center_km NUMERIC,
    score NUMERIC,
    n_comments INT,
    close_to_metro BOOLEAN,
    sustainability_cert BOOLEAN,
    location_score NUMERIC
);
"""

In [538]:
conn.cursor().executemany(
    """
    INSERT INTO booking_places (
        city_entityid, 
        name, 
        url, 
        distance_city_center_km, 
        score,
        n_comments,
        close_to_metro,
        sustainability_cert,
        location_score
    ) VALUES (%s, %s, %s, %s, %s,%s, %s, %s, %s)
    """,
    list(booking_places.itertuples(index=False, name=None))
)

NumericValueOutOfRange: entero fuera de rango
