# Eat Safe, Love

## Notebook Set Up

In [31]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [32]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [33]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [34]:
# review the collections in our database
db.list_collection_names()

['establishments']

In [35]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [36]:
# Find the establishments with a hygiene score of 20
query = {'scores.Hygiene': 20}

# Use count_documents to display the number of documents in the result
print("Number of documents in result:", establishments.count_documents(query))
# Display the first document in the results using pprint
print("First result:")
results = establishments.find(query)
pprint(results[0])

Number of documents in result: 41
First result:
{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': 0,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('6488de418c14bd4e55d0240b'),
 'geocode': {'latitude': 50.769705, 'longitude': 0.27694},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel': 'self'}],
 'me

In [13]:
# Convert the result to a Pandas DataFrame
results_df = pd.DataFrame(results)
# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(results_df))
# Display the first 10 rows of the DataFrame
results_df.head()

Rows in DataFrame:  41


Unnamed: 0,_id,FHRSID,ChangesByServerID,LocalAuthorityBusinessID,BusinessName,BusinessType,BusinessTypeID,AddressLine1,AddressLine2,AddressLine3,...,LocalAuthorityWebSite,LocalAuthorityEmailAddress,scores,SchemeType,geocode,RightToReply,Distance,NewRatingPending,meta,links
0,6488de418c14bd4e55d0240b,110681,0,4029,The Chase Rest Home,Caring Premises,5,5-6 Southfields Road,Eastbourne,East Sussex,...,http://www.eastbourne.gov.uk/foodratings,Customerfirst@eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 0.27694, 'latitude': 50.769705}",,4613.888288,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
1,6488de418c14bd4e55d0278b,612039,0,1970/FOOD,Brenalwood,Caring Premises,5,Hall Lane,Walton-on-the-Naze,Essex,...,http://www.tendringdc.gov.uk/,fhsadmin@tendringdc.gov.uk,"{'Hygiene': 20, 'Structural': 15, 'ConfidenceI...",FHRS,"{'longitude': 1.278721, 'latitude': 51.857536}",,4617.965824,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
2,6488de418c14bd4e55d02a98,730933,0,1698/FOOD,Melrose Hotel,Hotel/bed & breakfast/guest house,7842,53 Marine Parade East,Clacton On Sea,Essex,...,http://www.tendringdc.gov.uk/,fhsadmin@tendringdc.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 1.15927, 'latitude': 51.789429}",,4619.656144,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
3,6488de418c14bd4e55d02c86,172735,0,PI/000023858,Seaford Pizza,Takeaway/sandwich shop,7844,4 High Street,Seaford,East Sussex,...,http://www.lewes-eastbourne.gov.uk/,ehealth.ldc@lewes-eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 10, 'ConfidenceI...",FHRS,"{'longitude': 0.10202, 'latitude': 50.770885}",,4620.421725,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
4,6488de418c14bd4e55d02c9b,172953,0,PI/000024532,Golden Palace,Restaurant/Cafe/Canteen,1,5 South Street,Seaford,East Sussex,...,http://www.lewes-eastbourne.gov.uk/,ehealth.ldc@lewes-eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 10, 'ConfidenceI...",FHRS,"{'longitude': 0.101446, 'latitude': 50.770724}",,4620.437179,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [9]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query = {'AddressLine4': 'London', 'RatingValue': { '$gte': 4 } }

# Use count_documents to display the number of documents in the result
print("Number of documents in result:", establishments.count_documents(query))

# Display the first document in the results using pprint
results = establishments.find(query)
pprint(results[0])


Number of documents in result: 390
{'AddressLine1': 'Arch D',
 'AddressLine2': 'The Link Thamesmead',
 'AddressLine3': 'Bazalgette Way',
 'AddressLine4': 'London',
 'BusinessName': 'The Mezzanine Cafe',
 'BusinessType': 'Restaurant/Cafe/Canteen',
 'BusinessTypeID': 1,
 'ChangesByServerID': 0,
 'Distance': 4646.357884390823,
 'FHRSID': 1049722,
 'LocalAuthorityBusinessID': '18/00096/CP',
 'LocalAuthorityCode': '503',
 'LocalAuthorityEmailAddress': 'food.safety@bexley.gov.uk',
 'LocalAuthorityName': 'Bexley',
 'LocalAuthorityWebSite': 'http://www.bexley.gov.uk',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'SE2 9BS',
 'RatingDate': '2022-05-24T00:00:00',
 'RatingKey': 'fhrs_4_en-gb',
 'RatingValue': 4,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('6488de438c14bd4e55d078be'),
 'geocode': {'latitude': Decimal128('51.5002632141113'),
             'longitude': Decimal128('0.121751002967358')},
 'links': [{'href': 'http://api.ratings.food.gov.uk/establishments/104972

In [10]:
# Convert the result to a Pandas DataFrame
results_df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(results_df))

# Display the first 10 rows of the DataFrame
results_df.head()


Rows in DataFrame:  390


Unnamed: 0,_id,FHRSID,ChangesByServerID,LocalAuthorityBusinessID,BusinessName,BusinessType,BusinessTypeID,AddressLine1,AddressLine2,AddressLine3,...,LocalAuthorityWebSite,LocalAuthorityEmailAddress,scores,SchemeType,geocode,RightToReply,Distance,NewRatingPending,meta,links
0,6488de438c14bd4e55d078be,1049722,0,18/00096/CP,The Mezzanine Cafe,Restaurant/Cafe/Canteen,1,Arch D,The Link Thamesmead,Bazalgette Way,...,http://www.bexley.gov.uk,food.safety@bexley.gov.uk,"{'Hygiene': 10, 'Structural': 0, 'ConfidenceIn...",FHRS,"{'longitude': 0.121751002967358, 'latitude': 5...",,4646.357884,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
1,6488de438c14bd4e55d078e0,1261451,0,20/00255/MIXED,"St Edward's Church, Mottingham - StEP Lunch Cl...",Other catering premises,7841,St Edwards Church Hall,St Keverne Road,Mottingham,...,http://www.bromley.gov.uk,food@bromley.gov.uk,"{'Hygiene': 0, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.04774, 'latitude': 51.428019}",,4646.410908,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
2,6488de438c14bd4e55d078e5,987733,0,16/00340/MIXED,Maryfield Pre-School 2,Caring Premises,5,Mottingham Community Centre,Kimmeridge Road,Mottingham,...,http://www.bromley.gov.uk,food@bromley.gov.uk,"{'Hygiene': 5, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.0465259999036789, 'latitude': ...",,4646.414831,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
3,6488de438c14bd4e55d078e6,357627,0,00000/0000/8/951,Co-op,Retailers - supermarkets/hypermarkets,7840,The Co-operative Food,Kimmeridge Road,Mottingham,...,http://www.bromley.gov.uk,food@bromley.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.0465259999036789, 'latitude': ...",,4646.414831,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
4,6488de438c14bd4e55d07902,1151706,0,00000/0000/8/204,North Hub @ Castlecombe Youth Centre,Caring Premises,5,Castlecombe Youth Centre,Castlecombe Road,Mottingham,...,http://www.bromley.gov.uk,food@bromley.gov.uk,"{'Hygiene': 5, 'Structural': 10, 'ConfidenceIn...",FHRS,"{'longitude': 0.0433179996907711, 'latitude': ...",,4646.439635,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [50]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score

degree_search = 0.01
latitude = 51.49 #taken from NoSQL_setup Penang Flavours entry and rounded to second decimal place
longitude = 0.08 #taken from NoSQL_setup Penang Flavours entry and rounded to second decimal place

query = {'RatingValue' : 5, 'geocode.latitude' : {'$lte' : 51.50, '$gte' : 51.48}, 'geocode.longitude' : {'$lte' : 0.09, '$gte' : 0.07}}
           
sort =  [("scores.Hygiene", 1)]
limit = 5

results = list(establishments.find(query).sort(sort).limit(limit))

# Print the results
pprint(results)


[{'AddressLine1': 'The Old Mill 1 Old Mill Road',
  'AddressLine2': '',
  'AddressLine3': 'Plumstead',
  'AddressLine4': 'Greenwich',
  'BusinessName': 'The Old Mill',
  'BusinessType': 'Pub/bar/nightclub',
  'BusinessTypeID': 7843,
  'ChangesByServerID': 0,
  'Distance': 4647.050320500107,
  'FHRSID': 694555,
  'LocalAuthorityBusinessID': 'PI/000111022',
  'LocalAuthorityCode': '511',
  'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk',
  'LocalAuthorityName': 'Greenwich',
  'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk',
  'NewRatingPending': False,
  'Phone': '',
  'PostCode': 'SE18 1QG',
  'RatingDate': '2022-03-16T00:00:00',
  'RatingKey': 'fhrs_5_en-gb',
  'RatingValue': 5,
  'RightToReply': '',
  'SchemeType': 'FHRS',
  '_id': ObjectId('6488de438c14bd4e55d07d3d'),
  'geocode': {'latitude': 51.4817109, 'longitude': 0.0840959},
  'links': [{'href': 'http://api.ratings.food.gov.uk/establishments/694555',
             'rel': 'self'}],
  'meta': {'dataSource':

In [51]:
# Convert result to Pandas DataFrame
results_df = pd.DataFrame(results)
results_df


Unnamed: 0,_id,FHRSID,ChangesByServerID,LocalAuthorityBusinessID,BusinessName,BusinessType,BusinessTypeID,AddressLine1,AddressLine2,AddressLine3,...,LocalAuthorityWebSite,LocalAuthorityEmailAddress,scores,SchemeType,geocode,RightToReply,Distance,NewRatingPending,meta,links
0,6488de438c14bd4e55d07d3d,694555,0,PI/000111022,The Old Mill,Pub/bar/nightclub,7843,The Old Mill 1 Old Mill Road,,Plumstead,...,http://www.royalgreenwich.gov.uk,health@royalgreenwich.gov.uk,"{'Hygiene': 0, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.0840959, 'latitude': 51.4817109}",,4647.050321,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
1,6488de438c14bd4e55d07d80,1100050,0,12389,Best Halal Meat Limited,Retailers - other,4613,94 Plumstead High Street,,Plumstead,...,http://www.royalgreenwich.gov.uk,health@royalgreenwich.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.089611, 'latitude': 51.4881611}",,4647.085639,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
2,6488de438c14bd4e55d07d71,694338,0,PI/000061059,Conway Primary School,School/college/university,7845,Conway Primary School Gallosson Road,,Plumstead,...,http://www.royalgreenwich.gov.uk,health@royalgreenwich.gov.uk,"{'Hygiene': 0, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.0894860029220581, 'latitude': ...",,4647.079358,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
3,6488de438c14bd4e55d07d6a,694629,0,PI/000117347,Chick Chicken,Restaurant/Cafe/Canteen,1,98B Plumstead High Street,,Plumstead,...,http://www.royalgreenwich.gov.uk,health@royalgreenwich.gov.uk,"{'Hygiene': 0, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.0897508, 'latitude': 51.4880687}",,4647.077153,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
4,6488de438c14bd4e55d07c82,695241,0,PI/000179088,Plumstead Manor Nursery,Caring Premises,5,Plumstead Manor School Old Mill Road,,Plumstead,...,http://www.royalgreenwich.gov.uk,health@royalgreenwich.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.0859939977526665, 'latitude': ...",,4646.97401,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [20]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
match_query = {'$match': {'scores.Hygiene': 0}}

# 2. Groups the matches by Local Authority
group_query = {'$group': {
                    '_id': "$LocalAuthorityName", 
                    'count': { 
                        '$sum': 1 
                    }}
            }

# 3. Sorts the matches from highest to lowest
sort_values = {'$sort': { 'count': -1 }}

# Put the pipeline together
pipeline = [match_query, group_query, sort_values]

# Run the pipeline through the aggregate method and save the results to a variable
results = list(establishments.aggregate(pipeline))

# Print the number of documents in the result
print("Number of documents in result: ", len(results))

# Print the first 10 results
pprint(results[0:10])


Number of documents in result:  55
[{'_id': 'Thanet', 'count': 1130},
 {'_id': 'Greenwich', 'count': 882},
 {'_id': 'Maidstone', 'count': 713},
 {'_id': 'Newham', 'count': 711},
 {'_id': 'Swale', 'count': 686},
 {'_id': 'Chelmsford', 'count': 680},
 {'_id': 'Medway', 'count': 672},
 {'_id': 'Bexley', 'count': 607},
 {'_id': 'Southend-On-Sea', 'count': 586},
 {'_id': 'Tendring', 'count': 542}]


In [21]:
# Convert the result to a Pandas DataFrame
result_df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(result_df))

# Display the first 10 rows of the DataFrame
result_df.head(10)

Rows in DataFrame:  55


Unnamed: 0,_id,count
0,Thanet,1130
1,Greenwich,882
2,Maidstone,713
3,Newham,711
4,Swale,686
5,Chelmsford,680
6,Medway,672
7,Bexley,607
8,Southend-On-Sea,586
9,Tendring,542
