# Eat Safe, Love

## Notebook Set Up

In [2]:
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [3]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [4]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [5]:
# review the collections in our database
collections = db.list_collection_names()
print(f"Collections in 'uk_food':", collections)

Collections in 'uk_food': ['establishments']


In [6]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [7]:
# Find the establishments with a hygiene score of 20
query = {"HygieneScore": 20}
# Use count_documents to display the number of documents in the result
count = db['establishments'].count_documents(query)
print(f"Number of documents with HygieneScore of 20: {count}")

# Display the first document in the results using pprint
first_document = db['establishments'].find_one(query)
print("\nFirst document in the results:")
pprint(first_document)

Number of documents with HygieneScore of 20: 0

First document in the results:
None


In [8]:
# Convert the result to a Pandas DataFrame
# Display the number of rows in the DataFrame
cursor = db['establishments'].find(query)
df = pd.DataFrame(list(cursor))

# Display the first 10 rows of the DataFrame
print(f"\nNumber of rows in the DataFrame: {df.shape[0]}")
print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))


Number of rows in the DataFrame: 0

First 10 rows of the DataFrame:
Empty DataFrame
Columns: []
Index: []


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [9]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query = {
    "RatingValue": {"$gte": 4},
    "Address": {"$regex": "London", "$options": "i"}
}


# Use count_documents to display the number of documents in the result
count = db['establishments'].count_documents(query)
# Display the first document in the results using pprint
first_document = db['establishments'].find_one(query)
print("\nFirst document in the results:")
pprint(first_document)


First document in the results:
None


In [10]:
# Convert the result to a Pandas DataFrame
cursor = db['establishments'].find(query)
df = pd.DataFrame(list(cursor))
# Display the number of rows in the DataFrame
# Display the first 10 rows of the DataFrame
print(f"\nNumber of rows in the DataFrame: {df.shape[0]}")
print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))


Number of rows in the DataFrame: 0

First 10 rows of the DataFrame:
Empty DataFrame
Columns: []
Index: []


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [None]:
# Search within 0.01 degree on either side of the latitude and longitude.
penang_flavours_location = {
    "latitude": 51.5074,  #
    "longitude": -0.1278  
}

degree_search = 0.01

# Rating value must equal 5
query = query = {
    "RatingValue": 5,
    "latitude": {
        "$gte": penang_flavours_location["latitude"] - degree_search,
        "$lte": penang_flavours_location["latitude"] + degree_search
    },
    "longitude": {
        "$gte": penang_flavours_location["longitude"] - degree_search,
        "$lte": penang_flavours_location["longitude"] + degree_search
    }
}

# Sort by hygiene score
sort = [("HygieneScore", 1)]
limit = 10

# Print the results
print("\nResults:")



Results:


In [15]:
# Convert result to Pandas DataFrame
results = list(cursor)
df = pd.DataFrame(results)


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [20]:
# Create a pipeline that:
# 1. Matches establishments with a hygiene score of 0
pipeline = [
    {
        "$match": {
            "HygieneScore": 0
        }
    },
    # 2. Groups the matches by Local Authority
    {
        "$group": {
            "_id": "$LocalAuthorityName",
            "count": { "$sum": 1 }
        }
    },
    # 3. Sorts the matches from highest to lowest
    {
        "$sort": {
            "count": -1
        }
    }
]

cursor = db['establishments'].aggregate(pipeline)
results = list(cursor)
# Print the number of documents in the result
print(f"Number of Local Authorities with establishments having HygieneScore 0: {len(results)}")

# Print the first 10 results
print("\nFirst 10 results:")
for result in results[:10]:
    pprint(result)

Number of Local Authorities with establishments having HygieneScore 0: 0

First 10 results:


In [21]:
# Convert the result to a Pandas DataFrame
df = pd.DataFrame(results)
# Display the number of rows in the DataFrame
print(f"\nNumber of rows in the DataFrame: {len(df)}")

# Display the first 10 rows of the DataFrame
print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))


Number of rows in the DataFrame: 0

First 10 rows of the DataFrame:
Empty DataFrame
Columns: []
Index: []
