In [1]:
# Import 
import pandas as pd
from sqlalchemy import create_engine
%matplotlib notebook 
import matplotlib.pyplot as plt
import numpy as np



### Load CSVs

In [2]:
# Extract listing CSV into dataframe
listings_file = "../Resources/listings.csv"
raw_listings_df = pd.read_csv(listings_file)
raw_listings_df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,109,Amazing bright elegant condo park front *UPGRA...,521,Paolo,Other Cities,Culver City,33.98209,-118.38494,Entire home/apt,122,30,2,2016-05-15,0.02,1,14
1,344,Family perfect;Pool;Near Studios!,767,Melissa,Other Cities,Burbank,34.16562,-118.33458,Entire home/apt,168,2,8,2019-10-19,0.17,1,73
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,3008,Chas.,City of Los Angeles,Hollywood,34.09768,-118.34602,Private room,79,30,24,2020-03-17,0.34,2,281
3,2732,Zen Life at the Beach,3041,Yoga Priestess,Other Cities,Santa Monica,34.00475,-118.48127,Private room,155,1,21,2019-12-27,0.19,2,365
4,2864,*Upscale Professional Home with Beautiful Studio*,3207,Bernadine,Other Cities,Bellflower,33.87619,-118.11397,Entire home/apt,80,2,0,,,1,0


In [3]:
#raw_listings_df.loc[raw_listings_df["host_id"]==3008]

In [4]:
# Extract review CSV into dataframe
reviews_file = "../Resources/reviews.csv"
raw_reviews_df = pd.read_csv(reviews_file)
raw_reviews_df.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,109,449036,2011-08-15,927861,Edwin,The host canceled my reservation the day befor...
1,109,74506539,2016-05-15,22509885,Jenn,Me and two friends stayed for four and a half ...
2,344,79805581,2016-06-14,2089550,Drew & Katie,We really enjoyed our stay here in Burbank! Th...
3,344,120725697,2016-12-11,32602867,Christopher,I had a ton of fun learning to play Go with Fu...
4,344,123800867,2016-12-30,35822259,May,The host canceled this reservation the day bef...


In [5]:
#raw_reviews_df.loc[raw_reviews_df["id"]==3008]

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments


In [6]:
# We are only interested in the listings around centre of LA
# Select significant columns
central_listing_df=raw_listings_df.loc[raw_listings_df["neighbourhood_group"]=="City of Los Angeles",["id",
                                                                                                      "name",
                                                                                                      "host_name",
                                                                                                     "neighbourhood",
                                                                                                      "room_type",
                                                                                                     "price",
                                                                                                     "number_of_reviews",
                                                                                                     "last_review"]].copy()


central_listing_df.head(5)

#!!!!!!!!!!

Unnamed: 0,id,name,host_name,neighbourhood,room_type,price,number_of_reviews,last_review
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17
5,3021,Hollywood Hills Zen Modern style Apt/Guesthouse,Nataraj,Hollywood Hills West,Entire home/apt,145,23,2018-10-31
6,5728,Tiny Home in Artistic Oasis near Venice and LAX,Sanni,Del Rey,Private room,75,309,2020-03-13
7,5729,Zen Room with Floating Bed near Venice and LAX,Sanni,Del Rey,Private room,105,228,2020-03-09
8,5843,Artist Oasis near Venice Beach w/ Beautiful Ga...,Sanni,Del Rey,Entire home/apt,303,126,2020-03-16


In [7]:
central_listing_df.dtypes


id                    int64
name                 object
host_name            object
neighbourhood        object
room_type            object
price                 int64
number_of_reviews     int64
last_review          object
dtype: object

In [8]:
reviews_df=raw_reviews_df.copy()

In [9]:
# Change index column name in review dataframe so both dataframes can join 
raw_reviews_df_id = reviews_df.rename(columns={"listing_id": "id",
                                                   "id":"host id"})
raw_reviews_df_id.head(5)

Unnamed: 0,id,host id,date,reviewer_id,reviewer_name,comments
0,109,449036,2011-08-15,927861,Edwin,The host canceled my reservation the day befor...
1,109,74506539,2016-05-15,22509885,Jenn,Me and two friends stayed for four and a half ...
2,344,79805581,2016-06-14,2089550,Drew & Katie,We really enjoyed our stay here in Burbank! Th...
3,344,120725697,2016-12-11,32602867,Christopher,I had a ton of fun learning to play Go with Fu...
4,344,123800867,2016-12-30,35822259,May,The host canceled this reservation the day bef...


In [10]:
# Merge both dataframe on column id 
complete_df=pd.merge(central_listing_df, raw_reviews_df_id, how='inner', on="id")
complete_df.head(30)

Unnamed: 0,id,name,host_name,neighbourhood,room_type,price,number_of_reviews,last_review,host id,date,reviewer_id,reviewer_name,comments
0,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,13994902,2014-06-09,10905424,Kuberan,i had a wonderful stay. Everything from start ...
1,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,14606598,2014-06-23,2247288,Camilla,Charles is just amazing and he made my stay sp...
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,39597339,2015-07-25,27974696,Fallon,Staying with Chas was an absolute pleasure. He...
3,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,61157407,2016-02-01,33226412,Haroon,Charles is a most wonderful host. I enjoyed my...
4,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,66196280,2016-03-20,23408691,Massimo Litterio,Chas is a really good host. He gives me a lot ...
5,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,222420570,2017-12-29,155985882,Manami,ﾎｽﾄはとても親切で英語の発音の仕方など丁寧に教えてくれた｡\n\n部屋はﾘﾋﾞﾝｸﾞを板で...
6,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,225209928,2018-01-05,6840784,Diego,"Chas house feels like home, it is clean, it ha..."
7,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,227042674,2018-01-13,162469426,Hanbin,1. Clean 2. Host is very friendly 3. Reasonabl...
8,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,284617908,2018-07-01,93292025,Harjinder,Wow; Charles was a delight to meet a very warm...
9,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,291375787,2018-07-15,183639942,Taylor,Chas. has a great space ! Loved how clean ever...


In [11]:
# Rename the column headers
renamed_complete_df = complete_df.rename(columns={"id": "Listing ID",
                                                "name": "Listing Title",
                                                "host_name": "Host Name",
                                                  "host id":"Host ID",
                                                 "neighbourhood":"Region",
                                                 "room_type":"Room Type",
                                                 "price":"Price per Night",
                                                 "number_of_reviews":"Number of Reviews",
                                                 "last_review":"Last Review Time",
                                                 "date":"Review Date",
                                                  "reviewer_name":"Reviewer Name",
                                                  "reviewer_id":"Reviewer ID",
                                                  "comments":"Comments"
                                                 })


# Clean the data by dropping duplicates and nulls
#renamed_complete_df.drop_duplicates("Comments", inplace=True)
#renamed_complete_df.dropna()

renamed_complete_df.head(10)

Unnamed: 0,Listing ID,Listing Title,Host Name,Region,Room Type,Price per Night,Number of Reviews,Last Review Time,Host ID,Review Date,Reviewer ID,Reviewer Name,Comments
0,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,13994902,2014-06-09,10905424,Kuberan,i had a wonderful stay. Everything from start ...
1,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,14606598,2014-06-23,2247288,Camilla,Charles is just amazing and he made my stay sp...
2,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,39597339,2015-07-25,27974696,Fallon,Staying with Chas was an absolute pleasure. He...
3,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,61157407,2016-02-01,33226412,Haroon,Charles is a most wonderful host. I enjoyed my...
4,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,66196280,2016-03-20,23408691,Massimo Litterio,Chas is a really good host. He gives me a lot ...
5,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,222420570,2017-12-29,155985882,Manami,ﾎｽﾄはとても親切で英語の発音の仕方など丁寧に教えてくれた｡\n\n部屋はﾘﾋﾞﾝｸﾞを板で...
6,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,225209928,2018-01-05,6840784,Diego,"Chas house feels like home, it is clean, it ha..."
7,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,227042674,2018-01-13,162469426,Hanbin,1. Clean 2. Host is very friendly 3. Reasonabl...
8,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,284617908,2018-07-01,93292025,Harjinder,Wow; Charles was a delight to meet a very warm...
9,2708,Mirrored Mini-Suite with Fireplace - W. Hollywood,Chas.,Hollywood,Private room,79,24,2020-03-17,291375787,2018-07-15,183639942,Taylor,Chas. has a great space ! Loved how clean ever...


In [None]:
#renamed_complete_df.dtypes

In [12]:
a_path = "../Resources/test.csv"
a_df = pd.read_csv(a_path)
a_df.head()

Unnamed: 0,id,property_type,room_type,amenities,accommodates,bathrooms,bed_type,cancellation_policy,cleaning_fee,city,...,latitude,longitude,name,neighbourhood,number_of_reviews,review_scores_rating,thumbnail_url,zipcode,bedrooms,beds
0,3895911,Apartment,Private room,"{TV,""Cable TV"",Kitchen,""Free parking on premis...",2,1.0,Real Bed,flexible,True,LA,...,34.028372,-118.494449,Santa Monica Private Bedroom/Bathroom Suite,Santa Monica,6,97.0,https://a0.muscache.com/im/pictures/92355eae-b...,90403,1.0,1.0
1,9710289,Apartment,Entire home/apt,"{TV,""Cable TV"",""Wireless Internet"",""Air condit...",3,1.0,Real Bed,moderate,True,NYC,...,40.72038,-73.942329,"Bright, charming luxury 1 BR with amazing rooftop",Williamsburg,2,80.0,https://a0.muscache.com/im/pictures/da03e413-d...,11222,1.0,1.0
2,9051635,Apartment,Private room,"{""Wireless Internet"",Kitchen,Heating,""Family/k...",1,1.0,Real Bed,moderate,True,SF,...,37.785434,-122.470284,Private room in charming apartment,Richmond District,2,100.0,https://a0.muscache.com/im/pictures/0ba7d8aa-9...,94118,1.0,1.0
3,708374,Apartment,Entire home/apt,"{TV,""Cable TV"",Internet,""Wireless Internet"",""W...",1,1.0,Real Bed,strict,True,LA,...,33.976026,-118.463471,Marina del Rey Beach Jr 1 Bdrm 5,Marina Del Rey,7,94.0,https://a0.muscache.com/im/pictures/30279741/4...,90292,0.0,1.0
4,626296,Apartment,Entire home/apt,"{TV,Internet,""Wireless Internet"",""Air conditio...",2,1.0,Real Bed,flexible,True,NYC,...,40.735573,-74.005996,Bright Studio Loft Prime Location,West Village,0,,https://a0.muscache.com/im/pictures/9384e262-8...,10014,1.0,1.0


In [13]:
a_df.columns

Index(['id', 'property_type', 'room_type', 'amenities', 'accommodates',
       'bathrooms', 'bed_type', 'cancellation_policy', 'cleaning_fee', 'city',
       'description', 'first_review', 'host_has_profile_pic',
       'host_identity_verified', 'host_response_rate', 'host_since',
       'instant_bookable', 'last_review', 'latitude', 'longitude', 'name',
       'neighbourhood', 'number_of_reviews', 'review_scores_rating',
       'thumbnail_url', 'zipcode', 'bedrooms', 'beds'],
      dtype='object')

In [14]:
property_type_df=a_df.loc[a_df["city"]=="LA",["id",'property_type','room_type','accommodates','bathrooms',\
                                             'bed_type','city','name','neighbourhood','number_of_reviews',\
                                             'review_scores_rating','bedrooms','beds']].copy()

property_type_df

Unnamed: 0,id,property_type,room_type,accommodates,bathrooms,bed_type,city,name,neighbourhood,number_of_reviews,review_scores_rating,bedrooms,beds
0,3895911,Apartment,Private room,2,1.0,Real Bed,LA,Santa Monica Private Bedroom/Bathroom Suite,Santa Monica,6,97.0,1.0,1.0
3,708374,Apartment,Entire home/apt,1,1.0,Real Bed,LA,Marina del Rey Beach Jr 1 Bdrm 5,Marina Del Rey,7,94.0,0.0,1.0
5,3309829,Townhouse,Private room,3,2.0,Real Bed,LA,Private Room +Queen size double bed,,8,98.0,1.0,1.0
13,16014460,Apartment,Private room,1,1.0,Real Bed,LA,Great Location In Sunny Cali! dbl,Palms,30,93.0,1.0,1.0
15,12067734,Apartment,Entire home/apt,6,2.0,Real Bed,LA,Elegant Designer's Penthouse! New listing. View!,Westlake,18,100.0,2.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25443,13590200,House,Private room,2,1.0,Real Bed,LA,Los Angeles home centraly located,Westchester/Playa Del Rey,1,100.0,1.0,2.0
25445,9297698,Apartment,Private room,1,1.0,Real Bed,LA,1BD w/ Street View in Trendy WeHo,West Hollywood,0,,1.0,1.0
25449,13311005,Apartment,Entire home/apt,4,2.0,Real Bed,LA,Long Beach Beverly Plaza Apartment,,0,,2.0,4.0
25454,13550830,Apartment,Private room,1,2.5,Real Bed,LA,Luxury Apartment Toluca Lake,Toluca Lake,0,,1.0,1.0


In [None]:
#e_df=c_df.rename(columns={"id": "host_id"})
#e_df
#d_df = pd.merge(raw_listings_df, e_df, how='inner', on=["latitude","longitude"])
#d_df

In [15]:
t=property_type_df["property_type"].value_counts().head(8)
t
labels=t.index[0:9]
labels

Index(['Apartment', 'House', 'Condominium', 'Townhouse', 'Guesthouse', 'Loft',
       'Bungalow', 'Other'],
      dtype='object')

In [16]:
#r=property_type_df["room_type"].value_counts().head(8)


In [17]:
plt.figure(figsize=(7,4)) #first
plt.pie(t,autopct='%1.1f%%',labels=labels, startangle=45,textprops={'size': 'smaller'})
plt.axis("equal") 
plt.legend(labels, loc="center left")
#plt.figure(figsize=(10,5))
plt.show()
plt.tight_layout() 


<IPython.core.display.Javascript object>

### Create database connection

In [18]:
connection_string = "postgres:881118@localhost:5432/ETL"
engine = create_engine(f'postgresql://{connection_string}')

In [19]:
# Confirm tables
engine.table_names()

['property_type', 'listing_review']

### Load DataFrame into database

In [20]:
renamed_complete_df.to_sql(name='listing_review', con=engine, if_exists='replace', index=True)

In [21]:
property_type_df.to_sql(name='property_type', con=engine, if_exists='replace', index=True)