# Eat Safe, Love

## Notebook Set Up

In [1]:
# Import dependencies
%pip install pymongo
from pymongo import MongoClient
from pprint import pprint

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
list_of_collections = db.list_collection_names()
pprint(list_of_collections)

ServerSelectionTimeoutError: localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it, Timeout: 30s, Topology Description: <TopologyDescription id: 6521afe85c49d4660ee7fa2e, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it')>]>

In [None]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [None]:

query = {"scores.Hygiene": 20}


count = db.establishments.count_documents(query)


first_document = db.establishments.find_one(query)

print(f"There are {count} establishments with a hygiene score of 20.")
print("\nFirst establishment:")
pprint(first_document)


In [None]:
import pandas as pd

query = {"scores.Hygiene": 20}


documents = db.establishments.find(query)

df = pd.DataFrame(list(documents))


print(f"There are {len(df)} rows in the DataFrame.")


print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [None]:
# Set up the query for establishments in London with a RatingValue >= 4
query = {"LocalAuthorityName": "London", "RatingValue": {"$gte": 4}}

count = db.establishments.count_documents(query)

first_document = db.establishments.find_one(query)

print(f"There are {count} establishments in London with a RatingValue of 4 or higher.")
print("\nFirst establishment:")
pprint(first_document)


In [None]:
import pandas as pd

# Set up the query for establishments in London with a RatingValue >= 4
query = {"LocalAuthorityName": "London", "RatingValue": {"$gte": 4}}

# Fetch all documents that match the query
documents = db.establishments.find(query)

# Convert the documents to a Pandas DataFrame
df = pd.DataFrame(list(documents))


print(f"There are {len(df)} rows in the DataFrame.")


print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [None]:
# Given values
degree_search = 0.01
latitude = YOUR_LATITUDE_VALUE  # Replace with  value 
longitude = YOUR_LONGITUDE_VALUE  # Replace with  value

# Query for establishments within the range, with RatingValue of 5
query = {
    "RatingValue": 5,
    "geocode.latitude": {"$gte": latitude - degree_search, "$lte": latitude + degree_search},
    "geocode.longitude": {"$gte": longitude - degree_search, "$lte": longitude + degree_search}
}

# Sort by hygiene score
sort = [("scores.Hygiene", 1)]

# Fetch documents matching the query and sort parameter
results = db.establishments.find(query).sort(sort)

for doc in results:
    pprint(doc)

In [None]:
# Convert result to Pandas DataFrame
df = pd.DataFrame(list(results))

# Display the DataFrame
print(df)

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [None]:
# Define the aggregation pipeline
pipeline = [
    {
        "$match": {
            "scores.Hygiene": 0
        }
    },
    {
        "$group": {
            "_id": "$LocalAuthorityName",
            "count": {"$sum": 1}
        }
    },
    {
        "$sort": {
            "count": -1
        }
    }
]

# Execute the pipeline
results = list(db.establishments.aggregate(pipeline))

# Print the number of documents in the result
print(f"Number of documents in the result: {len(results)}")


for doc in results[:10]:
    pprint(doc)


In [None]:
# Convert the results to a Pandas DataFrame
df = pd.DataFrame(results)

# Rename the '_id' column to 'LocalAuthorityName'
df.rename(columns={'_id': 'LocalAuthorityName'}, inplace=True)

# Display the number of rows in the DataFrame
print(f"There are {len(df)} rows in the DataFrame.")

# Display the first 10 rows of the DataFrame
print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))
