In [118]:
from ipywidgets import interact 
import ipywidgets as widgets
from sqlalchemy import create_engine

%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [119]:
engine = create_engine('postgresql://postgres:pgadmin@localhost/postgres')
%sql postgresql://postgres:pgadmin@localhost/postgres

In [120]:
%config SqlMagic.autocommit=False

In [121]:
%%sql 
COMMIT;
DROP DATABASE IF EXISTS airbnb_icmc_2 WITH (FORCE);
COMMIT;
CREATE DATABASE airbnb_icmc_2
    WITH OWNER = postgres
    ENCODING = 'UTF8';
COMMIT;

   postgresql://postgres:***@localhost/airbnb_icmc_2
 * postgresql://postgres:***@localhost/postgres
Done.
Done.
Done.
Done.
Done.


[]

In [122]:
%config SqlMagic.autocommit=True

In [123]:
engine = create_engine('postgresql://postgres:pgadmin@localhost/airbnb_icmc_2')
%sql postgresql://postgres:pgadmin@localhost/airbnb_icmc_2

In [124]:
%%sql
DROP TABLE IF EXISTS Listings, Reviews, Calendar CASCADE;

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.


[]

# Inserindo dados nas tabelas do airbnb

In [125]:
%%sql
DROP TYPE IF EXISTS bool_sigla CASCADE;
CREATE TYPE bool_sigla AS ENUM {'t', 'f'}

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [126]:
%%sql
DROP TABLE IF EXISTS Listings CASCADE;
CREATE TABLE Listings (
    id BIGINT PRIMARY KEY,
    listing_url TEXT,
    scrape_id BIGINT,
    last_scraped DATE,
    source TEXT,
    name TEXT,
    description TEXT,
    neighborhood_overview TEXT,
    picture_url TEXT,
    host_id BIGINT,
    host_url TEXT,
    host_name TEXT,
    host_since DATE,
    host_location TEXT,
    host_about TEXT,
    host_response_time TEXT,
    host_response_rate TEXT,
    host_acceptance_rate TEXT,
    host_is_superhost bool_sigla,
    host_thumbnail_url TEXT,
    host_picture_url TEXT,
    host_neighbourhood TEXT,
    host_listings_count INT,
    host_total_listings_count INT,
    host_verifications TEXT,
    host_has_profile_pic BOOLEAN,
    host_identity_verified BOOLEAN,
    neighbourhood TEXT,
    neighbourhood_cleansed TEXT,
    neighbourhood_group_cleansed TEXT,
    latitude DOUBLE PRECISION,
    longitude DOUBLE PRECISION,
    property_type TEXT,
    room_type TEXT,
    accommodates INT,
    bathrooms INT,
    bathrooms_text TEXT,
    bedrooms INT,
    beds INT,
    amenities TEXT,
    price TEXT,
    minimum_nights INT,
    maximum_nights INT,
    minimum_minimum_nights INT,
    maximum_minimum_nights INT,
    minimum_maximum_nights INT,
    maximum_maximum_nights INT,
    minimum_nights_avg_ntm DOUBLE PRECISION,
    maximum_nights_avg_ntm DOUBLE PRECISION,
    calendar_updated TEXT,
    has_availability bool_sigla,
    availability_30 INT,
    availability_60 INT,
    availability_90 INT,
    availability_365 INT,
    calendar_last_scraped DATE,
    number_of_reviews INT,
    number_of_reviews_ltm INT,
    number_of_reviews_l30d INT,
    first_review DATE,
    last_review DATE,
    review_scores_rating DOUBLE PRECISION,
    review_scores_accuracy DOUBLE PRECISION,
    review_scores_cleanliness DOUBLE PRECISION,
    review_scores_checkin DOUBLE PRECISION,
    review_scores_communication DOUBLE PRECISION,
    review_scores_location DOUBLE PRECISION,
    review_scores_value DOUBLE PRECISION,
    license TEXT,
    instant_bookable bool_sigla,
    calculated_host_listings_count INT,
    calculated_host_listings_count_entire_homes INT,
    calculated_host_listings_count_private_rooms INT,
    calculated_host_listings_count_shared_rooms INT,
    reviews_per_month DOUBLE PRECISION
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [127]:
%%sql
DROP TABLE IF EXISTS Reviews CASCADE;
CREATE TABLE Reviews (
    id BIGINT,
    listing_id BIGINT,
    date DATE,
    reviewer_id BIGINT,
    reviewer_name TEXT,
    comments TEXT
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [128]:
%%sql
DROP TABLE IF EXISTS Calendar CASCADE;
CREATE TABLE Calendar (
    listing_id BIGINT,
    date DATE,
    available bool_sigla,
    price TEXT,
    adjusted_price TEXT,
    minimum_nights INT,
    maximum_nights INT,
    PRIMARY KEY (listing_id, date),
    FOREIGN KEY (listing_id) REFERENCES Listings(id)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [129]:
!psql -c "\copy Listings FROM 'listings.csv' WITH (FORMAT CSV, DELIMITER ',', ENCODING 'UTF8', NULL '', QUOTE '\"', HEADER true);" -U postgres -d airbnb_icmc_2

COPY 36008


In [130]:
!psql -c "\copy Reviews FROM './reviews.csv' WITH (FORMAT CSV, DELIMITER ',', ENCODING 'UTF8', NULL '', QUOTE '\"', HEADER true);" -U postgres -d airbnb_icmc_2

COPY 703796


In [131]:
!psql -c "\copy Calendar FROM './calendar.csv' WITH (FORMAT CSV, DELIMITER ',', ENCODING 'UTF8', NULL '', QUOTE '\"', HEADER true);" -U postgres -d airbnb_icmc_2

COPY 13145595


# Normalizando as tabelas do airbnb

In [132]:
%%sql
DROP TABLE IF EXISTS Listings_norm CASCADE;
CREATE TABLE Listings_norm AS (
    SELECT
        id,
        listing_url,
        name,
        description,
        neighborhood_overview,
        picture_url,
        neighbourhood,
        neighbourhood_cleansed,
        neighbourhood_group_cleansed,
        latitude,
        longitude,
        property_type,
        room_type,
        accommodates,
        bathrooms,
        bathrooms_text,
        bedrooms,
        beds,
        price,
        minimum_nights,
        maximum_nights,
        minimum_minimum_nights,
        maximum_minimum_nights,
        minimum_maximum_nights,
        maximum_maximum_nights,
        minimum_nights_avg_ntm,
        maximum_nights_avg_ntm,
        calendar_updated,
        has_availability,
        availability_30,
        availability_60,
        availability_90,
        availability_365,
        calendar_last_scraped,
        number_of_reviews,
        number_of_reviews_ltm,
        number_of_reviews_l30d,
        first_review,
        last_review,
        review_scores_rating,
        review_scores_accuracy,
        review_scores_cleanliness,
        review_scores_checkin,
        review_scores_communication,
        review_scores_location,
        review_scores_value,
        license,
        instant_bookable,
        reviews_per_month
    FROM Listings);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
36008 rows affected.


[]

In [133]:
%%sql
SELECT *
FROM Listings_norm
LIMIT 10

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
10 rows affected.


id,listing_url,name,description,neighborhood_overview,picture_url,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,reviews_per_month
17878,https://www.airbnb.com/rooms/17878,Condo in Rio de Janeiro · ★4.70 · 2 bedrooms · 2 beds · 1 bath,,"This is the one of the bests spots in Rio. Because of the large balcony and proximity to the beach, it has huge advantages in the current situation.",https://a0.muscache.com/pictures/65320518/30698f38_original.jpg,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,Entire condo,Entire home/apt,5,,1 bath,,2.0,"$1,357.00",5,28,5,5,28,28,5.0,28.0,,t,5,7,14,269,2023-12-27,311,29,4,2010-07-15,2023-12-22,4.7,4.77,4.65,4.83,4.91,4.77,4.67,,f,1.9
25026,https://www.airbnb.com/rooms/25026,Rental unit in Rio de Janeiro · ★4.72 · 1 bedroom · 1 bed · 1 bath,,"Copacabana is a lively neighborhood and the apartment is located very close to an area in Copa full of bars, cafes and restaurants at Rua Bolivar and Domingos Ferreira. Copacabana never sleeps, there is always movement and it's a great mix of all kinds of people.",https://a0.muscache.com/pictures/a745aa21-b8dd-4959-a040-eb8e6e6f07ee.jpg,"Rio de Janeiro, Brazil",Copacabana,,-22.97735,-43.19105,Entire rental unit,Entire home/apt,3,,1 bath,,1.0,$865.00,2,60,2,4,60,60,2.2,60.0,,t,3,18,48,228,2023-12-27,275,29,2,2010-06-07,2023-12-03,4.72,4.7,4.79,4.81,4.92,4.84,4.6,,f,1.67
35764,https://www.airbnb.com/rooms/35764,Loft in Rio de Janeiro · ★4.90 · 1 bedroom · 1 bed · 1.5 baths,,"Our guests will experience living with a local peole ""Carioca"" in a very friendly building with 24 hours a day security with all kind of stores, banks, transports, restaurants.",https://a0.muscache.com/pictures/23782972/1d3e55b0_original.jpg,"Rio de Janeiro, Brazil",Copacabana,,-22.98107,-43.19136,Entire loft,Entire home/apt,2,,1.5 baths,,1.0,$373.00,3,15,1,6,7,15,3.1,14.7,,t,4,9,12,62,2023-12-27,454,36,2,2010-10-03,2023-12-17,4.9,4.93,4.93,4.97,4.95,4.94,4.89,,f,2.82
41198,https://www.airbnb.com/rooms/41198,Rental unit in Rio de Janeiro · ★4.21 · 2 bedrooms · 1 bath,,,https://a0.muscache.com/pictures/3576716/2d6a9301_original.jpg,,Copacabana,,-22.98102,-43.19172,Entire rental unit,Entire home/apt,5,,1 bath,,,"$1,701.00",3,365,3,7,365,365,3.2,365.0,,t,15,38,41,44,2023-12-27,17,0,0,2013-06-04,2016-02-09,4.21,3.88,4.25,4.69,4.56,4.44,4.38,,f,0.13
326205,https://www.airbnb.com/rooms/326205,Condo in Rio de Janeiro · ★4.57 · 1 bedroom · 1 bed · 1 bath,,,https://a0.muscache.com/pictures/c550151d-910c-40c6-96a8-d2a8bd770361.jpg,,Copacabana,,-22.96825,-43.18237,Entire condo,Entire home/apt,4,,1 bath,,1.0,$366.00,3,180,3,3,180,180,3.0,180.0,,t,6,22,27,293,2023-12-27,152,14,0,2012-04-18,2023-11-21,4.57,4.72,4.46,4.83,4.77,4.83,4.59,,f,1.07
326575,https://www.airbnb.com/rooms/326575,Rental unit in Rio de Janeiro · ★4.81 · 2 bedrooms · 3 beds · 2 baths,,"Come to stay in Baixo Copa, the more trendy and happy neighborhood of all Rio de Janeiro, in the heart of Copacabana, less than a half block from the beach. Restaurants, bars, grocery stores, theaters, banks, hotels and tourism agencies are in the neighborhood.",https://a0.muscache.com/pictures/4cffcbcf-16c2-4624-afee-29a7ffe20698.jpg,"Rio de Janeiro, Brazil",Copacabana,,-22.97696,-43.18933,Entire rental unit,Entire home/apt,5,,2 baths,,3.0,$368.00,4,60,4,4,60,60,4.0,60.0,,t,4,9,39,245,2023-12-27,227,12,0,2012-03-19,2023-11-21,4.81,4.85,4.8,4.91,4.89,4.95,4.73,,f,1.58
216461,https://www.airbnb.com/rooms/216461,Rental unit in Rio de Janeiro · 1 bedroom · 1 bed · 1 bath,,,https://a0.muscache.com/pictures/2628485/1ed768bb_original.jpg,,Flamengo,,-22.9399,-43.17676,Private room in rental unit,Private room,2,,1 bath,,1.0,$734.00,1,760,1,1,760,760,1.0,760.0,,t,30,60,90,365,2023-12-26,0,0,0,,,,,,,,,,,f,
48305,https://www.airbnb.com/rooms/48305,Rental unit in Ipanema · ★4.76 · 6 bedrooms · 7 beds · 7 baths,,"Enter Bossa Nova history by staying in the very street where real-life 'Girl From Ipanema' inspired Vinicius de Moraes to write the worldwide famous song, ""each day when she walks to the sea"".<br /><br />Located seconds from Ipanema Beach's best spot Posto 9, on the first beach block in the heart of Ipanema’s very best neighbourhood, enjoy staying in this elegant and modern hideaway in a peaceful residential street.<br /><br />Ipanema and Leblon are by far the safest locations in Rio, with popular restaurants, cafes and boutique stores walking distance to the apartment.",https://a0.muscache.com/pictures/miso/Hosting-48305/original/cce14bf9-c5b6-44c6-a89f-61323975afdb.jpeg,"Ipanema, Rio de Janeiro, Brazil",Ipanema,,-22.98591,-43.20302,Entire rental unit,Entire home/apt,13,,7 baths,,7.0,"$6,604.00",2,89,2,5,89,89,2.3,89.0,,t,18,38,65,319,2023-12-26,163,36,3,2011-03-02,2023-12-10,4.76,4.72,4.7,4.83,4.83,4.94,4.59,,t,1.04
216700,https://www.airbnb.com/rooms/216700,Rental unit in Rio de Janeiro · ★4.96 · 1 bedroom · 1 bed · 1 shared bath,,"O bairro de Laranjeiras é bem residencial e arborizado. Fica na Zona Sul do Rio de Janeiro, bem próximo das praias e do Centro da cidade. Na principal rua do bairro, Rua das Laranjeiras, tem ônibus que leva aos principais bairros da cidade e muito pontos turísticos. A minha rua fica distante 20 minutos a pé da estação do metro Largo do Machado. Vocês podem ir andando, 10 minutos, para a estação do trem do Corcovado que leva ao Cristo Redentor. A minha rua, Rua General Glicério, é a rua mais linda do bairro e fica em um recanto que parece uma bairro dentro do bairro. É uma rua bem segura e aos sábados tem feira livre com música ao vivo na praça. Em frente ao meu prédio tem padaria, pequenos restaurante e mini-mercados. Na rua principal, esquina com a minha rua tem bancos, supermercados, farmácias, pontos de taxi e todos os outros serviços.",https://a0.muscache.com/pictures/6162310/be07750f_original.jpg,"Rio de Janeiro, Brazil",Laranjeiras,,-22.94373,-43.19147,Private room in rental unit,Private room,4,,1 shared bath,,1.0,$300.00,3,30,3,3,30,30,3.0,30.0,,t,24,51,81,356,2023-12-28,24,3,0,2012-06-18,2023-11-05,4.96,4.91,4.86,5.0,5.0,4.91,4.77,,f,0.17
219250,https://www.airbnb.com/rooms/219250,Loft in Rio de Janeiro · ★4.82 · 1 bedroom · 2 beds · 1 bath,,,https://a0.muscache.com/pictures/60226390/d079690d_original.jpg,,Santa Teresa,,-22.91666,-43.17947,Entire loft,Entire home/apt,4,,1 bath,,2.0,$254.00,2,30,2,5,1125,1125,2.1,1125.0,,t,5,9,37,312,2023-12-26,431,25,2,2012-06-11,2023-12-11,4.82,4.87,4.74,4.94,4.87,4.76,4.78,,t,3.07


In [134]:
%%sql
DROP TABLE IF EXISTS Scrape CASCADE;
CREATE TABLE Scrape AS (
    SELECT
        scrape_id,
        source,
        last_scraped,
        calendar_last_scraped
    FROM Listings);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
36008 rows affected.


[]

In [135]:
%%sql
SELECT *
FROM Scrape
LIMIT 10

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
10 rows affected.


scrape_id,source,last_scraped,calendar_last_scraped
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-27,2023-12-27
20231226034138,city scrape,2023-12-26,2023-12-26
20231226034138,city scrape,2023-12-26,2023-12-26
20231226034138,city scrape,2023-12-28,2023-12-28
20231226034138,city scrape,2023-12-26,2023-12-26


In [136]:
%%sql
DROP TABLE IF EXISTS Amenities CASCADE;
CREATE TABLE Amenities (
    listing_id BIGINT,
    name_amenity TEXT,
    quantity INT,
    PRIMARY KEY (listing_id, name_amenity),
    FOREIGN KEY (listing_id) REFERENCES Listings_norm(id)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
(psycopg2.errors.InvalidForeignKey) ERRO:  não há restrição de unicidade que corresponde com as colunas informadas na tabela referenciada "listings_norm"

[SQL: CREATE TABLE Amenities (
    listing_id BIGINT,
    name_amenity TEXT,
    quantity INT,
    PRIMARY KEY (listing_id, name_amenity),
    FOREIGN KEY (listing_id) REFERENCES Listings_norm(id)
);]
(Background on this error at: https://sqlalche.me/e/20/f405)


In [137]:
%%sql
DROP TABLE IF EXISTS Host CASCADE;
CREATE TABLE Host (
    host_id BIGINT PRIMARY KEY,
    host_url TEXT UNIQUE NOT NULL,
    host_name TEXT,
    host_since DATE,
    host_location TEXT,
    host_about TEXT,
    host_response_time TEXT,
    host_response_rate TEXT,
    host_acceptance_rate TEXT,
    host_is_superhost bool_sigla,
    host_thumbnail_url TEXT,
    host_picture_url TEXT,
    host_neighbourhood TEXT,
    host_listings_count INT,
    host_total_listings_count INT,
    host_has_profile_pic BOOLEAN,
    host_identity_verified BOOLEAN,
    calculated_host_listings_count INT,
    calculated_host_listings_count_entire_homes INT,
    calculated_host_listings_count_private_rooms INT,
    calculated_host_listings_count_shared_rooms INT
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [143]:
%sql SELECT * FROM Host LIMIT 10;

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
0 rows affected.


host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_has_profile_pic,host_identity_verified,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms


In [138]:
%%sql
DROP TABLE IF EXISTS Host_verifications CASCADE;
CREATE TABLE Host_verifications (
    host_id BIGINT,
    verification_field TEXT,
    PRIMARY KEY (host_id, verification_field)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [139]:
%%sql
DROP TABLE IF EXISTS Reviewer CASCADE;
CREATE TABLE Reviewer (
    reviewer_id BIGINT PRIMARY KEY,
    reviewer_name TEXT
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [140]:
%%sql
DROP TABLE IF EXISTS Reviews_norm CASCADE;
CREATE TABLE Reviews_norm (
    id BIGINT PRIMARY KEY,
    listing_id BIGINT,
    reviewer_id BIGINT,
    date DATE,
    comments TEXT,
    FOREIGN KEY (reviewer_id) REFERENCES Reviewer(reviewer_id)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [141]:
%%sql
DROP TABLE IF EXISTS Calendar_norm CASCADE;
CREATE TABLE Calendar_norm (
    listing_id BIGINT,
    date DATE,
    available bool_sigla,
    price TEXT,
    adjusted_price TEXT,
    PRIMARY KEY (listing_id, date),
    FOREIGN KEY (listing_id) REFERENCES Listings(id)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]

In [142]:
%%sql
DROP TABLE IF EXISTS Info_listings CASCADE;
CREATE TABLE Info_listings (
    listing_id BIGINT PRIMARY KEY,
    minimum_nights INT,
    maximum_nights INT,
    FOREIGN KEY (listing_id) REFERENCES Listings(id)
);

 * postgresql://postgres:***@localhost/airbnb_icmc_2
   postgresql://postgres:***@localhost/postgres
Done.
Done.


[]