In [1]:
# Import 
import pandas as pd
from sqlalchemy import create_engine

### Load CSVs

In [2]:
# Extract listing CSV into dataframe
listings_file = "../Resources/listings.csv"
raw_listings_df = pd.read_csv(listings_file)
raw_listings_df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,109,Amazing bright elegant condo park front *UPGRA...,521,Paolo,Other Cities,Culver City,33.98209,-118.38494,Entire home/apt,122,30,2,2016-05-15,0.02,1,14
1,344,Family perfect;Pool;Near Studios!,767,Melissa,Other Cities,Burbank,34.16562,-118.33458,Entire home/apt,168,2,8,2019-10-19,0.17,1,73
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,3008,Chas.,City of Los Angeles,Hollywood,34.09768,-118.34602,Private room,79,30,24,2020-03-17,0.34,2,281
3,2732,Zen Life at the Beach,3041,Yoga Priestess,Other Cities,Santa Monica,34.00475,-118.48127,Private room,155,1,21,2019-12-27,0.19,2,365
4,2864,*Upscale Professional Home with Beautiful Studio*,3207,Bernadine,Other Cities,Bellflower,33.87619,-118.11397,Entire home/apt,80,2,0,,,1,0


In [3]:
# Extract review CSV into dataframe
reviews_file = "../Resources/reviews.csv"
raw_reviews_df = pd.read_csv(reviews_file)
raw_reviews_df.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,109,449036,2011-08-15,927861,Edwin,The host canceled my reservation the day befor...
1,109,74506539,2016-05-15,22509885,Jenn,Me and two friends stayed for four and a half ...
2,344,79805581,2016-06-14,2089550,Drew & Katie,We really enjoyed our stay here in Burbank! Th...
3,344,120725697,2016-12-11,32602867,Christopher,I had a ton of fun learning to play Go with Fu...
4,344,123800867,2016-12-30,35822259,May,The host canceled this reservation the day bef...


In [4]:
# We are only interested in the listings around centre of LA
# Select significant columns
central_listing_df=raw_listings_df.loc[raw_listings_df["neighbourhood_group"]=="City of Los Angeles",["id",
                                                                                                      "name",
                                                                                                      "host_name",
                                                                                                     "neighbourhood",
                                                                                                      "room_type",
                                                                                                     "price",
                                                                                                     "number_of_reviews",
                                                                                                     "last_review"]]


central_listing_df.head(5)

Unnamed: 0,id,name,host_name,neighbourhood,room_type,price,number_of_reviews,last_review
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17
5,3021,Hollywood Hills Zen Modern style Apt/Guesthouse,Nataraj,Hollywood Hills West,Entire home/apt,145,23,2018-10-31
6,5728,Tiny Home in Artistic Oasis near Venice and LAX,Sanni,Del Rey,Private room,75,309,2020-03-13
7,5729,Zen Room with Floating Bed near Venice and LAX,Sanni,Del Rey,Private room,105,228,2020-03-09
8,5843,Artist Oasis near Venice Beach w/ Beautiful Ga...,Sanni,Del Rey,Entire home/apt,303,126,2020-03-16


In [5]:
# Change index column name in review dataframe so both dataframes can join 
raw_reviews_df_id = raw_reviews_df.rename(columns={"listing_id": "id",
                                                   "id":"host id"})
raw_reviews_df_id.head(5)

Unnamed: 0,id,host id,date,reviewer_id,reviewer_name,comments
0,109,449036,2011-08-15,927861,Edwin,The host canceled my reservation the day befor...
1,109,74506539,2016-05-15,22509885,Jenn,Me and two friends stayed for four and a half ...
2,344,79805581,2016-06-14,2089550,Drew & Katie,We really enjoyed our stay here in Burbank! Th...
3,344,120725697,2016-12-11,32602867,Christopher,I had a ton of fun learning to play Go with Fu...
4,344,123800867,2016-12-30,35822259,May,The host canceled this reservation the day bef...


In [6]:
central_listing_df.shape

(22058, 8)

In [12]:
raw_reviews_df_id.shape

(1339115, 6)

In [25]:
# Merge both dataframe on column id 
complete_df=pd.merge(central_listing_df, raw_reviews_df_id, how='inner', on="id")
complete_df.head(30)

Unnamed: 0,id,name,host_name,neighbourhood,room_type,price,number_of_reviews,last_review,host id,date,reviewer_id,reviewer_name,comments
0,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,13994902,2014-06-09,10905424,Kuberan,i had a wonderful stay. Everything from start ...
1,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,14606598,2014-06-23,2247288,Camilla,Charles is just amazing and he made my stay sp...
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,39597339,2015-07-25,27974696,Fallon,Staying with Chas was an absolute pleasure. He...
3,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,61157407,2016-02-01,33226412,Haroon,Charles is a most wonderful host. I enjoyed my...
4,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,66196280,2016-03-20,23408691,Massimo Litterio,Chas is a really good host. He gives me a lot ...
5,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,222420570,2017-12-29,155985882,Manami,ﾎｽﾄはとても親切で英語の発音の仕方など丁寧に教えてくれた｡\n\n部屋はﾘﾋﾞﾝｸﾞを板で...
6,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,225209928,2018-01-05,6840784,Diego,"Chas house feels like home, it is clean, it ha..."
7,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,227042674,2018-01-13,162469426,Hanbin,1. Clean 2. Host is very friendly 3. Reasonabl...
8,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,284617908,2018-07-01,93292025,Harjinder,Wow; Charles was a delight to meet a very warm...
9,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,291375787,2018-07-15,183639942,Taylor,Chas. has a great space ! Loved how clean ever...


In [26]:
complete_df.shape

(811168, 13)

In [27]:
# Rename the column headers
renamed_complete_df = complete_df.rename(columns={"id": "Listing ID",
                                                "name": "Listing Title",
                                                "host_name": "Host Name",
                                                  "host id":"Host ID",
                                                 "neighbourhood":"Region",
                                                 "room_type":"Room Type",
                                                 "price":"Price per Night",
                                                 "number_of_reviews":"Number of Reviews",
                                                 "last_review":"Last Review Time",
                                                 "date":"Review Date",
                                                  "reviewer_name":"Reviewer Name",
                                                  "reviewer_id":"Reviewer ID",
                                                  "comments":"Comments"
                                                 })


# Clean the data by dropping duplicates and nulls
renamed_complete_df.drop_duplicates("Comments", inplace=True)
renamed_complete_df.dropna()

renamed_complete_df.head(20)

Unnamed: 0,Listing ID,Listing Title,Host Name,Region,Room Type,Price per Night,Number of Reviews,Last Review Time,Host ID,Review Date,Reviewer ID,Reviewer Name,Comments
0,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,13994902,2014-06-09,10905424,Kuberan,i had a wonderful stay. Everything from start ...
1,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,14606598,2014-06-23,2247288,Camilla,Charles is just amazing and he made my stay sp...
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,39597339,2015-07-25,27974696,Fallon,Staying with Chas was an absolute pleasure. He...
3,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,61157407,2016-02-01,33226412,Haroon,Charles is a most wonderful host. I enjoyed my...
4,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,66196280,2016-03-20,23408691,Massimo Litterio,Chas is a really good host. He gives me a lot ...
5,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,222420570,2017-12-29,155985882,Manami,ﾎｽﾄはとても親切で英語の発音の仕方など丁寧に教えてくれた｡\n\n部屋はﾘﾋﾞﾝｸﾞを板で...
6,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,225209928,2018-01-05,6840784,Diego,"Chas house feels like home, it is clean, it ha..."
7,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,227042674,2018-01-13,162469426,Hanbin,1. Clean 2. Host is very friendly 3. Reasonabl...
8,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,284617908,2018-07-01,93292025,Harjinder,Wow; Charles was a delight to meet a very warm...
9,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,291375787,2018-07-15,183639942,Taylor,Chas. has a great space ! Loved how clean ever...


### Create database connection

In [28]:
connection_string = "postgres:postgres@localhost:5432/ETL"
engine = create_engine(f'postgresql://{connection_string}')

In [29]:
# Confirm tables
engine.table_names()

['listing_review']

### Load DataFrame into database

In [30]:
renamed_complete_df.dtypes

Listing ID            int64
Listing Title        object
Host Name            object
Region               object
Room Type            object
Price per Night       int64
Number of Reviews     int64
Last Review Time     object
Host ID               int64
Review Date          object
Reviewer ID           int64
Reviewer Name        object
Comments             object
dtype: object

In [31]:
renamed_complete_df["Last Review Time"] = pd.to_datetime(renamed_complete_df["Last Review Time"], format="%Y-%m-%d")

In [32]:
renamed_complete_df["Review Date"] = pd.to_datetime(renamed_complete_df["Review Date"], format="%Y-%m-%d")

In [33]:
renamed_complete_df.dtypes

Listing ID                    int64
Listing Title                object
Host Name                    object
Region                       object
Room Type                    object
Price per Night               int64
Number of Reviews             int64
Last Review Time     datetime64[ns]
Host ID                       int64
Review Date          datetime64[ns]
Reviewer ID                   int64
Reviewer Name                object
Comments                     object
dtype: object

In [34]:
renamed_complete_df.to_sql(name='listing_review', con=engine, if_exists='replace', index=True)

In [35]:
check_df = pd.read_sql(sql="SELECT * FROM listing_review", con=engine, parse_dates=["Review Date","Last Review Time"])
check_df.head()

Unnamed: 0,index,Listing ID,Listing Title,Host Name,Region,Room Type,Price per Night,Number of Reviews,Last Review Time,Host ID,Review Date,Reviewer ID,Reviewer Name,Comments
0,0,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,13994902,2014-06-09,10905424,Kuberan,i had a wonderful stay. Everything from start ...
1,1,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,14606598,2014-06-23,2247288,Camilla,Charles is just amazing and he made my stay sp...
2,2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,39597339,2015-07-25,27974696,Fallon,Staying with Chas was an absolute pleasure. He...
3,3,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,61157407,2016-02-01,33226412,Haroon,Charles is a most wonderful host. I enjoyed my...
4,4,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,66196280,2016-03-20,23408691,Massimo Litterio,Chas is a really good host. He gives me a lot ...


In [36]:
check_df.dtypes

index                         int64
Listing ID                    int64
Listing Title                object
Host Name                    object
Region                       object
Room Type                    object
Price per Night               int64
Number of Reviews             int64
Last Review Time     datetime64[ns]
Host ID                       int64
Review Date          datetime64[ns]
Reviewer ID                   int64
Reviewer Name                object
Comments                     object
dtype: object