In [1]:
# Import modules
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import pymongo

# Google API Key
from config import gkey

# Download from https://www.kaggle.com/datasets?search=New+york+restaurant
data_path = 'assets/ny_restaurant_data.csv'

### New York

In [2]:
# Instantiate a NY DataFrame from the downloaded CSV file
NY_df = pd.read_csv(data_path)

# Replace a space between a column name with _
NY_df.columns = NY_df.columns.str.replace(' ','_')

#  There are rows with NaN in the Sub_Subindustry column
NY_df.head(1)

Unnamed: 0,Company_Name,Subindustry,Sub_Subindustry,Address,Phone,Website,Borough,Postcode,Latitude,Longitude,Community_Board,Community_Council_,Census_Tract,BIN,BBL,NTA,Location_1
0,Europa Cafe,Café / Deli,,3 Times Sq,212-239-0160,www.euorpacafe.com,1,10036.0,40.756376,-73.986713,5.0,3.0,119.0,1024686.0,1010140000.0,Midtown-Midtown South ...,"{'latitude': '40.756376', 'needs_recoding': Fa..."


In [3]:
# Drop a row with NaN in the Sub_Subindustry column
NY_cleaned_df = NY_df.dropna(subset=['Sub_Subindustry'])

# Retrieve rows with selected columns
NY_reduced_df = NY_cleaned_df[["Company_Name", "Sub_Subindustry", "Address"]]

# Show the first three rows
NY_reduced_df.head(2)

Unnamed: 0,Company_Name,Sub_Subindustry,Address
1,Ranch 1,Fast Food,832 Eighth Ave
2,Sosa Borella,Argentinian,832 Eighth Ave


In [4]:
# Rename the columns to be more explanatory
NY_renamed_df = NY_reduced_df.rename(columns={"Company_Name": "Name",  "Sub_Subindustry": "Type"})

NY_renamed_df.head(2)

Unnamed: 0,Name,Type,Address
1,Ranch 1,Fast Food,832 Eighth Ave
2,Sosa Borella,Argentinian,832 Eighth Ave


In [5]:
# Filter by the value of Chinese in the column of Type
NY_chinese_df = NY_renamed_df.loc[NY_renamed_df["Type"] == "Chinese", :]

NY_chinese_df.head(2)

Unnamed: 0,Name,Type,Address
5,China Gourmet,Chinese,877 Eighth Ave
62,John's Shanghai,Chinese,144 W 46th St


In [6]:
# connect to Mongo database
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Create a db called Restaurants_db
db = client.Restaurants_db

# DB collect name is items
collection = db.items

In [7]:
# Read the row from NY_chinese_df to create a dictory
for _, row in NY_chinese_df.iterrows():
    dic = {
        'Name':row["Name"],
        'Type': row["Type"],
        'Address':row["Address"] + ", New York City"
    }
    
    #insert dictionary into mongo    
    collection.insert_one(dic)

### San Francisco

In [8]:
# find the Chinese restaurant in San Francisco
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

params = {
    "location": "37.773972,-122.431297",  # San Francisco coords
    "rankby": "distance",
    "type": "restaurant",
    "key": gkey,
    "keyword": "chinese"
}

In [9]:
# Retrieve data in json format
response = requests.get(base_url, params=params).json()
    
# extract results
results = response['results']

In [10]:
# Read the row from NY_chinese_df to create a dictory
for row in results: 
    dic = {
        'Name': row["name"],
        'Type': "Chinese",
        'Address': row['vicinity']
    }
    
    #insert dictionary into mongo    
    collection.insert_one(dic)