In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database

# List the collections in the database
collections = db.list_collection_names()

# Display the collections
for collection in collections:
    print(collection)


establishments


In [5]:
# assign the collection to a variable
establishments = db['establishments']

# Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

In [6]:
### 1. Which establishments have a hygiene score equal to 20?

In [7]:
# Find the establishments with a hygiene score of 20
query = {"scores.Hygiene": 20}

# Use count_documents to display the number of documents in the result
count = establishments.count_documents(query)
print("Number of establishments with hygiene score equal to 20:", count)

# Display the first document in the results using pprint
result = establishments.find_one(query)
pprint(result)


Number of establishments with hygiene score equal to 20: 0
None


In [8]:
# Find the establishments with a hygiene score of 20
query = {"scores.Hygiene": 20}

# Convert the result to a Pandas DataFrame
df = pd.DataFrame(list(establishments.find(query)))

# Display the number of rows in the DataFrame
num_rows = len(df)
print("Number of rows in the DataFrame:", num_rows)

# Display the first 10 rows of the DataFrame
print("First 10 rows of the DataFrame:")
print(df.head(10))

Number of rows in the DataFrame: 0
First 10 rows of the DataFrame:
Empty DataFrame
Columns: []
Index: []


In [9]:
# Find establishments with a hygiene score equal to 20
hygiene_query = {
    "scores.Hygiene": "20"
}

hygiene_result = list(establishments.find(hygiene_query))
hygiene_count = establishments.count_documents(hygiene_query)

print("Establishments with hygiene score equal to 20:")
print("Number of documents:", hygiene_count)

if hygiene_count > 0:
    pprint(hygiene_result[0])
else:
    print("No documents found.")

print()

Establishments with hygiene score equal to 20:
Number of documents: 0
No documents found.



# 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [10]:
# Define the query
london_query = {
    "LocalAuthorityName": {"$regex": "London", "$options": "i"},
    "RatingValue": {"$gte": 4}
}

london_result = list(establishments.find(london_query))
london_count = establishments.count_documents(london_query)

print("Establishments in London with RatingValue >= 4:")
print("Number of documents:", london_count)

if london_count > 0:
    pprint(london_result[0])
else:
    print("No documents found.")

Establishments in London with RatingValue >= 4:
Number of documents: 0
No documents found.


In [11]:
from bson.son import SON

# Set the latitude and longitude of "Penang Flavours"
latitude = 51.490142
longitude = 0.08384

# Define the search range
degree_search = 0.01

# Construct the query
query = {
    "geocode.latitude": {"$gte": latitude - degree_search, "$lte": latitude + degree_search},
    "geocode.longitude": {"$gte": longitude - degree_search, "$lte": longitude + degree_search},
    "RatingValue": 5
}

# Define the sort order
sort = [("scores.Hygiene", 1)]

# Perform the query and sort
result = establishments.find(query).sort(sort).limit(5)

# Convert the result to a DataFrame
df = pd.DataFrame(list(result))

# Print the number of rows in the DataFrame
print("Number of establishments:", len(df))

# Display the first 10 rows of the DataFrame
print(df.head(10))



Number of establishments: 0
Empty DataFrame
Columns: []
Index: []


In [12]:
# Convert the result to a DataFrame
df = pd.DataFrame(result)

# Print the number of rows in the DataFrame
print("Number of establishments:", len(df))

# Display the first 10 rows of the DataFrame
print(df.head(10))

Number of establishments: 0
Empty DataFrame
Columns: []
Index: []


In [13]:
### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [15]:
# Assign the collection to a variable
establishments = db['establishments']

# Create a pipeline that matches establishments with a hygiene score of 0, groups them by Local Authority, and sorts them from highest to lowest
pipeline = [
    {
        '$match': {
            'scores.Hygiene': '0'
        }
    },
    {
        '$group': {
            '_id': '$LocalAuthorityName',
            'count': {'$sum': 1}
        }
    },
    {
        '$sort': {'count': -1}
    }
]

# Print the number of documents in the result
count = establishments.count_documents({'scores.Hygiene': '0'})
print('Number of establishments:', count)

# Execute the pipeline and retrieve the results
results = list(establishments.aggregate(pipeline))

# Print the first 10 results
for result in results[:10]:
    pprint(result)

Number of establishments: 0


In [16]:
# Execute the pipeline and retrieve the results
results = list(establishments.aggregate(pipeline))

# Convert the result to a Pandas DataFrame
df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
row_count = len(df)
print('Number of rows:', row_count)

# Display the first 10 rows of the DataFrame
print(df.head(10))

Number of rows: 0
Empty DataFrame
Columns: []
Index: []
