In [15]:
# Run this to load the unfiltered data into your local library.
# Before running, do the following:
# 1.) Create philly_cuisine_db in pgAdmin
# 2.) Run the philly_cuisine_schema.sql
# 3.) Create a config.py with entry : "pw = "your postgres password"

In [16]:
import pandas as pd

In [17]:
df = pd.read_json("../data/businesses.json")
df.head()

Unnamed: 0,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,{'$oid': '633640780f0a38ccd2241644'},MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"Restaurants, Food, Bubble Tea, Coffee & Tea, B..."
1,{'$oid': '633640780f0a38ccd2241650'},MUTTqe8uqyMdBl186RmNeA,Tuna Bar,205 Race St,Philadelphia,PA,19106,39.953949,-75.143226,4.0,245,1,"Sushi Bars, Restaurants, Japanese"
2,{'$oid': '633640780f0a38ccd2241654'},ROeacJQwBeh05Rqg7F6TCg,BAP,1224 South St,Philadelphia,PA,19147,39.943223,-75.162568,4.5,205,1,"Korean, Restaurants"
3,{'$oid': '633640780f0a38ccd224165d'},QdN72BWoyFypdGJhhI5r7g,Bar One,767 S 9th St,Philadelphia,PA,19147,39.939825,-75.157447,4.0,65,0,"Cocktail Bars, Bars, Italian, Nightlife, Resta..."
4,{'$oid': '633640780f0a38ccd2241660'},Mjboz24M9NlBeiOJKLEd_Q,DeSandro on Main,4105 Main St,Philadelphia,PA,19127,40.022466,-75.218314,3.0,41,0,"Pizza, Restaurants, Salad, Soup"


In [18]:
len(df)

14569

# Extract the columns needed for analysis

In [19]:
philly_businesses_df = df[['name', 'address', 'city', 'state', 'postal_code',
       'latitude', 'longitude', 'stars', 'review_count', 'is_open',
       'categories']]
philly_businesses_df.head()

Unnamed: 0,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"Restaurants, Food, Bubble Tea, Coffee & Tea, B..."
1,Tuna Bar,205 Race St,Philadelphia,PA,19106,39.953949,-75.143226,4.0,245,1,"Sushi Bars, Restaurants, Japanese"
2,BAP,1224 South St,Philadelphia,PA,19147,39.943223,-75.162568,4.5,205,1,"Korean, Restaurants"
3,Bar One,767 S 9th St,Philadelphia,PA,19147,39.939825,-75.157447,4.0,65,0,"Cocktail Bars, Bars, Italian, Nightlife, Resta..."
4,DeSandro on Main,4105 Main St,Philadelphia,PA,19127,40.022466,-75.218314,3.0,41,0,"Pizza, Restaurants, Salad, Soup"


In [20]:
len(philly_businesses_df)

14569

# Rename the columns

In [21]:
philly_businesses_df = philly_businesses_df.rename(columns={ 'name'         : 'name'
                                                           , 'address'      : 'address'
                                                           , 'city'         : 'city'
                                                           , 'state'        : 'state'
                                                           , 'postal_code'  : 'zip_code'
                                                           , 'latitude'     : 'latitude'
                                                           , 'longitude'    : 'longitude'
                                                           , 'stars'        : 'rating'
                                                           , 'review_count' : 'review_count'
                                                           , 'is_open'      : 'is_open'
                                                           , 'categories'   : 'categories'
                                                           })

In [22]:
philly_businesses_df.head()

Unnamed: 0,name,address,city,state,zip_code,latitude,longitude,rating,review_count,is_open,categories
0,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"Restaurants, Food, Bubble Tea, Coffee & Tea, B..."
1,Tuna Bar,205 Race St,Philadelphia,PA,19106,39.953949,-75.143226,4.0,245,1,"Sushi Bars, Restaurants, Japanese"
2,BAP,1224 South St,Philadelphia,PA,19147,39.943223,-75.162568,4.5,205,1,"Korean, Restaurants"
3,Bar One,767 S 9th St,Philadelphia,PA,19147,39.939825,-75.157447,4.0,65,0,"Cocktail Bars, Bars, Italian, Nightlife, Resta..."
4,DeSandro on Main,4105 Main St,Philadelphia,PA,19127,40.022466,-75.218314,3.0,41,0,"Pizza, Restaurants, Salad, Soup"


# Clean up the data

In [23]:
philly_businesses_df =philly_businesses_df.dropna()
len(philly_businesses_df)

14560

# Connect to db

In [24]:
from config import pw
from sqlalchemy import create_engine, text

In [25]:
protocol = 'postgresql'
username = 'postgres'
password = pw
host = 'localhost'
port = 5432
database_name = 'philly_cuisine_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

# Check tables

In [26]:
engine.table_names()

  engine.table_names()


['philly_cuisine']

# Load the pandas dataframe into database

In [27]:
philly_businesses_df.to_sql(name='philly_cuisine', con=engine, if_exists='replace', index=False)

560

# Verify if the data were successfully loaded

In [28]:
query   =  'select count(*) from philly_cuisine'
pd.read_sql_query(query, con=engine)

Unnamed: 0,count
0,14560
