# Eat Safe, Love

## Notebook Set Up

In [None]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [None]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [None]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [None]:
# review the collections in our new database

# list of collection names in the db
collection_names = db.list_collection_names()

# Print the list
print(collection_names)



In [None]:
# assign the collection to a variable
collection = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [None]:

# A query to find the establishments with a hygiene score of 20
hygiene_query = {"scores.Hygiene": 20}
hygiene_results = collection.find(hygiene_query)

# list the correct number of documents
hygiene_count = collection.count_documents(hygiene_query)
print("Number of documents with hygiene score 20:", hygiene_count)

pprint(hygiene_results[0])


In [None]:
hygiene_df = pd.DataFrame(hygiene_results)
hygiene_df
#print(hygiene_df.head(10))

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [None]:
# Find the establishments with London as the Local Authority and a RatingValue greater than or equal to 4
query_london = {'LocalAuthorityName': {'$regex': 'London'}, 'RatingValue': {'$gte': 4}}

# Use count_documents to display the number of documents in the result
count_london = collection.count_documents(query_london)
print(f"There are {count_london} establishments in London that have a RatingValue greater than or equal to 4.")

# Display the first document in the results using pprint
from pprint import pprint
first_document = collection.find_one(query_london)
pprint(first_document)


In [None]:
# Convert the result to a Pandas DataFrame
london_df = pd.DataFrame(collection.find(query_london))

# Display the number of rows in the DataFrame
print(f"There are {len(london_df)} rows in this DataFrame.")

# Display the first 10 rows of the DataFrame
london_df.head(10)

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [None]:
# Find the latitude and longitude of "Penang Flavours"
penang_flavours = collection.find_one({'BusinessName': 'Penang Flavours'}, {'geocode.latitude', 'geocode.longitude'})
pprint(penang_flavours['geocode'])


In [None]:
import math

# Latitude and Longitude of "Penang Flavours"
pf_latitude = 51.490142
pf_longitude = 0.083840

# Query to find establishments with RatingValue 5 within a specific range
query = {
    'RatingValue': 5,
    'scores.Hygiene': {'$ne': ''},
    'geocode.latitude': {'$gt': pf_latitude - 0.01, '$lt': pf_latitude + 0.01},
    'geocode.longitude': {'$gt': pf_longitude - 0.01, '$lt': pf_longitude + 0.01}
}

# Sort the establishments by hygiene score in ascending order and retrieve the top 5
results = collection.find(query).sort([('scores.Hygiene', 1)]).limit(5)

# Print the top 5 establishments
print("Top 5 establishments with RatingValue 5, sorted by lowest hygiene score and nearest to 'Penang Flavours':")
for i, establishment in enumerate(results, 1):
    print(f"#{i}")
    print("Business Name:", establishment['BusinessName'])
    print("Hygiene Score:", establishment['scores']['Hygiene'])
    print("Distance from 'Penang Flavours':", math.dist((pf_latitude, pf_longitude), (float(establishment['geocode']['latitude']), float(establishment['geocode']['longitude']))))
    print("-----------------------------------------")


In [None]:
# # Search within 0.01 degree on either side of the latitude and longitude.
# # Rating value must equal 5
# # Sort by hygiene score

# degree_search = 0.01
# latitude = 
# longitude = 

# query = 
# sort =  

# # Print the results


In [None]:
# Convert result to Pandas DataFrame


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [None]:
# Query to find establishments with hygiene score of 0
query4 = {'scores.Hygiene': 0}

# Group by Local Authority and count the number of establishments
group_by_local_authority = {
    '$group': {
        '_id': '$LocalAuthorityName',
        'count': {'$sum': 1}
    }
}

# Sort the results in descending order by count
sort_by_count = {
    '$sort': {'count': -1}
}

# Limit the results to the top ten Local Authority areas
limit_results = {
    '$limit': 10
}

# Pipeline to perform the aggregation
pipeline = [
    {'$match': query4},
    group_by_local_authority,
    sort_by_count,
    limit_results
]

# Perform the aggregation
results = collection.aggregate(pipeline)

# Print the results
print("Top ten Local Authority areas with establishments having a hygiene score of 0:")
for result in results:
    print(result['_id'], "-", result['count'])


In [None]:

# aggregation pipeline
pipeline = [
    {
        '$match': {
            'scores.Hygiene': 0
        }
    },
    {
        '$group': {
            '_id': '$LocalAuthorityName',
            'count': {'$sum': 1}
        }
    },
    {
        '$sort': {
            'count': -1
        }
    },
    {
        '$limit': 10
    }
]

# Execute the aggregation query
results_hygiene_0 = list(collection.aggregate(pipeline))

# Print the top ten Local Authority areas with establishments having a hygiene score of 0
print("Top ten Local Authority areas with establishments having a hygiene score of 0:")
for result in results_hygiene_0:
    pprint(result)

# Convert results to a Pandas DataFrame and display the first 10 rows
results_hygiene_0_df = pd.DataFrame(results_hygiene_0)
print(results_hygiene_0_df.head(10))


In [None]:
# Convert results to a Pandas DataFrame and display the first 10 rows
results_hygiene_0_df = pd.DataFrame(results_hygiene_0)
print(results_hygiene_0_df.head(10))