# Eat Safe, Love

## Notebook Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [5]:
# assign the collection to a variable
establishments = db['establishments']

In [7]:
pprint(db.establishments.find_one())

{'AddressLine1': 'Penang Flavours',
 'AddressLine2': '146A Plumstead Rd',
 'AddressLine3': 'London',
 'AddressLine4': '',
 'BusinessName': 'Penang Flavours',
 'BusinessType': 'Restaurant/Cafe/Canteen',
 'BusinessTypeID': '1',
 'Distance': 4623.972328074718,
 'LocalAuthorityCode': '511',
 'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk',
 'LocalAuthorityName': 'Greenwich',
 'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk',
 'NewRatingPending': True,
 'Phone': '',
 'PostCode': 'SE18 7DY',
 'RatingValue': None,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('6671f576cab41aa9bc86b956'),
 'geocode': {'latitude': 51.490142, 'longitude': 0.08384},
 'scores': {'ConfidenceInManagement': '', 'Hygiene': '', 'Structural': ''}}


## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [8]:
# Find the establishments with a hygiene score of 20 (within 'scores')
query = {'scores.Hygiene': 20}

# Use count_documents to display the number of documents in the result
hygiene_20 = establishments.count_documents(query)
# Display the first document in the results using pprint
print (f'Within establishments there are {hygiene_20} establishments with a hygiene score of 20!\n')
print('-'*75 + '\n')       
pprint(establishments.find_one(query))

Within establishments there are 0 establishments with a hygiene score of 20!

---------------------------------------------------------------------------

None


In [9]:
query = {'scores.Hygiene': 20}
results = establishments.find(query)
# Convert the result to a Pandas DataFrame
df = pd.DataFrame(results)
# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(df))
# Display the first 10 rows of the DataFrame
df.head(10)


Rows in DataFrame:  0


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [10]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query_a = {'LocalAuthorityName': {'$regex': 'London'},\
           'RatingValue': {'$gte': '4'}}

# Use count_documents to display the number of documents in the result
a = establishments.count_documents(query_a)
# Display the first document in the results using pprint
print (f'Within establishments there are {a} establishments with a Rating greater or     equal to 4!\n')
print('-'*75 + '\n')       
pprint(establishments.find_one(query_a))

Within establishments there are 0 establishments with a Rating greater or     equal to 4!

---------------------------------------------------------------------------

None


In [11]:
results_a = establishments.find(query_a)
# Convert the result to a Pandas DataFrame
df_2 = pd.DataFrame(establishments.find(query_a))
# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(df_2))
# Display the first 10 rows of the DataFrame
df_2.head(10)

Rows in DataFrame:  0


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [12]:
# check for latitude and longitude of "Penang Flavours"
pprint(establishments.find_one({'BusinessName': 'Penang Flavours'},\
                    {'geocode.latitude': 1, 'geocode.longitude': 1}))

{'_id': ObjectId('6671f576cab41aa9bc86b956'),
 'geocode': {'latitude': 51.490142, 'longitude': 0.08384}}


In [13]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score by ascending order (lowest to highest)

degree_search = 0.01
latitude = 51.490142
longitude = 0.08384

query_b = {'RatingValue': {'$regex': '5'}, 
         'geocode.latitude':{'$lte': (latitude + degree_search),\
                             '$gte': (latitude - degree_search)}, 
         'geocode.longitude':{'$lte': (longitude + degree_search),\
                              '$gte': (longitude - degree_search)}}
sort =  [('scores.Hygiene', 1)]
# Print the results
results = establishments.find(query_b).sort(sort).limit(5)
for x in results:
    pprint(x)

In [15]:
# Convert result to Pandas DataFrame
pd.DataFrame(establishments.find(query_b).sort(sort).limit(5))

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [16]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
match_query = {'$match': {'scores.Hygiene': 0}}

# 2. Groups the matches by Local Authority
group_query = {'$group': {'_id': '$LocalAuthorityName',\
                          'count': {'$sum': 1}}}

# 3. Sorts the matches from highest to lowest
sort_query = {'$sort': {'count': -1}}

# 4. Put the pipeline together
pipeline = [match_query, group_query, sort_query]

# 5.Run the pipeline through the aggregate method and save the results to a variable
results_1 = list(establishments.aggregate(pipeline))

# Print the number of documents in the result
print("Number of documents: ", len(results_1))
print('-'*75 + '\n')
# Print the first 10 results
pprint(results_1[0:10])


Number of documents:  0
---------------------------------------------------------------------------

[]


In [17]:
# Convert the result to a Pandas DataFrame
result_df = pd.DataFrame(results_1)
# Display the number of rows in the DataFrame
print("Number of rows: ", len(results_1))
print('-'*75 + '\n')
# Display the first 10 rows of the DataFrame
result_df.head(10)


Number of rows:  0
---------------------------------------------------------------------------

