# Eat Safe, Love

## Notebook Set Up

In [56]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [57]:
# Create an instance of the MongoClient
client = MongoClient('mongodb://localhost:27017/')

# Connect to the 'uk_food' database
db = client.uk_food

# Check to confirm that the database exists by listing all databases
print(client.list_database_names())


['admin', 'config', 'fruit_db', 'local', 'uk_food']


In [58]:
# Check to confirm that the 'establishments' collection exists
print(db.list_collection_names())

['establishments']


In [59]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [60]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [61]:
# Review the collections in our database

# Assuming you have already connected to your MongoDB database
collections = db.list_collection_names()

# Print the list of collections
def print_collections(collections):
    print("Collections in 'uk_food' database:", collections)

# Call the function to print collections
print_collections(collections)


Collections in 'uk_food' database: ['establishments']


In [62]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [63]:
# Create an instance of MongoClient and access the database
mongo = MongoClient(port=27017)
db = mongo['uk_food']
collection = db['establishments']

# Define the query to find establishments with a hygiene score of 20
query = {"HygieneScore": 20}

# Use count_documents to display the number of documents in the result
count = collection.count_documents(query)
print(f"Number of establishments with a hygiene score of 20: {count}")

# Display the first document in the results using pprint
first_document = collection.find_one(query)
print("First establishment with a hygiene score of 20:")
pprint(first_document)


Number of establishments with a hygiene score of 20: 0
First establishment with a hygiene score of 20:
None


In [64]:
# Create an instance of MongoClient and access the database
mongo = MongoClient(port=27017)
db = mongo['uk_food']
collections = db['establishments']

In [65]:
# Verify the field names and check available hygiene scores
# Assuming 'collections' is a reference to the specific collection you want to query

# Query to find distinct hygiene scores
hygiene_scores = collections.distinct('HygieneScore')
print(f"Distinct hygiene scores available: {hygiene_scores}")

# If you want to create a DataFrame from the hygiene scores
import pandas as pd

# Creating a DataFrame from the distinct hygiene scores
df_hygiene_score = pd.DataFrame(hygiene_scores, columns=['HygieneScore'])


Distinct hygiene scores available: []


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [66]:
# Query to count establishments in London with a RatingValue > 4
establishments = db['establishments']  # Replace with your actual collection name
count_high_rated_establishments = establishments.count_documents({
    'RatingValue': {'$gt': 4},  # Greater than 4
    'Address.LocalAuthorityArea': 'London'  # Filter for London establishments
})

# Print the count
print(f"Number of establishments in London with a RatingValue > 4: {count_high_rated_establishments}")


Number of establishments in London with a RatingValue > 4: 0


In [67]:
# Convert the cursor to a list and then to a DataFrame
high_rated_establishments_list = list(high_rated_establishments_cursor)
df_high_rated_establishments = pd.DataFrame(high_rated_establishments_list)

# Print the DataFrame
print(df_high_rated_establishments)


Empty DataFrame
Columns: []
Index: []


In [68]:
# 2. Display the number of rows in the DataFrame
print(f"Number of rows: {df_hygiene_scores.shape[0]}")

Number of rows: 0


In [69]:
# Display the first 10 rows of the DataFrame
print(df_hygiene_scores.head(10))

Empty DataFrame
Columns: []
Index: []


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [70]:
# Parameters for the search
degree_search = 0.01
latitude = 51.509865   # Example: Latitude of London
longitude = -0.118092  # Example: Longitude of London

# Construct the query: Latitude/Longitude range and Rating = 5
query = {
    "Latitude": {"$gt": latitude - degree_search, "$lt": latitude + degree_search},
    "Longitude": {"$gt": longitude - degree_search, "$lt": longitude + degree_search},
    "RatingValue": 5
}

In [71]:
# Sort by hygiene score in ascending order (use -1 for descending)
sort = [("HygieneScore", 1)]

# Limit the results (example: limit to 10 results)
limit = 10

# Execute the query with sorting and limit
results = collections.find(query).sort(sort).limit(limit)

# Convert the results to a Pandas DataFrame
df_results = pd.DataFrame(list(results))

# 1. Print the number of rows in the DataFrame
pprint(f"Number of rows: {df_results.shape[0]}")

'Number of rows: 0'


In [72]:
# 2. Display the first 10 rows of the DataFrame using pprint
pprint(df_results.head(10).to_dict('records'))


[]


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [73]:
# Define the aggregation pipeline
pipeline = [
    # 1. Match establishments with a hygiene score of 0
    {"$match": {"HygieneScore": 0}},
    
    # 2. Group by Local Authority and count the number of establishments
    {"$group": {
        "_id": "$LocalAuthority",        # Group by Local Authority
        "count": {"$sum": 1}             # Count number of establishments
    }},
    
    # 3. Sort the results by count in descending order
    {"$sort": {"count": -1}}
]

# Execute the aggregation pipeline
results = collection.aggregate(pipeline)

# Convert the aggregation result to a list
results_list = list(results)

# 4. Print the number of documents in the result
pprint(f"Number of Local Authorities: {len(results_list)}")

# 5. Print the first 10 results
pprint(results_list[:10])


'Number of Local Authorities: 0'
[]


In [74]:
# Execute the aggregation pipeline
results = collection.aggregate(pipeline)

# Convert the aggregation result to a list
results_list = list(results)

# Convert the result to a Pandas DataFrame
df_results = pd.DataFrame(results_list)

# 1. Display the number of rows in the DataFrame
pprint(f"Number of rows in DataFrame: {df_results.shape[0]}")

# 2. Display the first 10 rows of the DataFrame
pprint(df_results.head(10).to_dict('records'))


'Number of rows in DataFrame: 0'
[]
