In [48]:
import os
import configparser
import pymongo
import pandas as pd
import warnings

# To ignore all warnings
warnings.filterwarnings("ignore")

# Paths and directory
current_directory = os.getcwd()
config = configparser.ConfigParser()

config_file_path = os.path.join(current_directory, '03-configfiles', 'pw-config.ini')
config.read(config_file_path)

# Get the MongoDB connection string from the config file
mongo_client = config.get('mongo', 'connectiondb')
mongo_collection = config.get('mongo', 'collectionmongo')
mongo_db = config.get('mongo', 'dbmonog')

# Establish connection to MongoDB
client = pymongo.MongoClient(mongo_client)

# Select the database and collection
db = client[mongo_db]  
collection = db[mongo_collection]  

# Define the aggregation pipeline
pipeline = [
    {"$unwind": "$grades"},
    {
        "$match": {
            "$and": [
                {"cuisine": {"$regex": "Japan"}},
                {"borough": {"$in": ["Queens", "Brooklyn"]}}
            ]
        }
    },
    {"$group": {
        "_id": {"cuisine": "$cuisine", "borough": "$borough"},
        "avg": {"$avg": "$grades.score"},
        "count": {"$sum": 1},
        "min": {"$min": "$grades.score"},
        "max": {"$max": "$grades.score"}
    }},
    {"$project": {
        "_id": 0,
        "borough": "$_id.borough",
        "cuisine": "$_id.cuisine",
        "avg": 1,
        "count": 1,
        "min": 1,
        "max": 1
    }},
    {"$sort": {"avg": -1}}  # Sorting by 'avg' field in descending order
]

# Query data from MongoDB using the defined pipeline
data = list(collection.aggregate(pipeline))  # Convert the MongoDB cursor to a list

# Create a DataFrame
df = pd.DataFrame(data)
df = df[['borough', 'cuisine', 'count', 'avg', 'min', 'max']]

print(df)

# Save the filtered data to a CSV file
csv_file_path = os.path.join(current_directory, '01-data', 'mongo', 'data_from_mongodb_filtered_BK.csv')
try:
    df.to_csv(csv_file_path, index=False)
    print('CSV output completed')
except Exception as e:
    print(f"Error saving CSV file: {e}")


    borough           cuisine  count        avg  min  max
0  Brooklyn          Japanese      9  15.888889    7   27
1    Queens          Japanese     44  12.068182    2   36
2  Brooklyn  Chinese/Japanese      5  10.800000    5   23
CSV output completed
