# Connection to DB + Data export

In [1]:
# Récupération variable enrionnement
from dotenv import load_dotenv
import os

load_dotenv()  # charge le .env automatiquement
MONGODB_URI = os.getenv("MONGODB_URI")

if not MONGODB_URI:
    raise ValueError("MONGODB_URI is not set in the environment variables.")

In [2]:
from pymongo import MongoClient

client = MongoClient(MONGODB_URI)

if client is not None:
    print("Connected to DB")
else:
    print("Failed to connect to DB")

Connected to DB


In [3]:
# Base de données
db = client["ai-wedder"]

print("Successfully connected to :", db.name)

Successfully connected to : ai-wedder


### Chargement des datas

In [4]:
# Modèle cible
Review = db["reviews"]

# Récupération des données
reviews = Review.aggregate([
    {
        "$group": {
            "_id": "$venue",
            "reviews": {
                "$push": {
                    "text": "$text",
                    "sentiment": "$aiSentiment",
                    "sentimentConfidenceScore": "$aiConfidenceScore",
                    "tags": "$aiClusters"
                }
            }
        }
    },
    {
        "$lookup": {
            "from": "venues",
            "localField": "_id",
            "foreignField": "_id",
            "as": "catering_company"
        }
    },
    {
        "$project": {
            "cateringCompanyId": { "$first": "$catering_company._id" },
            "cateringCompanyName": { "$first": "$catering_company.name" },
            "reviews": "$reviews"
        }
    },
    {
        "$project": {
            "_id": 0
        }
    }
])

reviews_list = list(reviews)

In [8]:
import json
import os
from bson import ObjectId

# Fonction de transformation récursive
def convert_objectids(obj):
    if isinstance(obj, list):
        return [convert_objectids(item) for item in obj]
    elif isinstance(obj, dict):
        return {k: convert_objectids(v) for k, v in obj.items()}
    elif isinstance(obj, ObjectId):
        return str(obj)
    else:
        return obj

# Conversion des ObjectId
cleaned_reviews = convert_objectids(reviews_list)

# Création du dossier d'export
output_dir = "../data"
os.makedirs(output_dir, exist_ok=True)

# Sauvegarde en JSON
output_path = f"{output_dir}/catering_reviews_clustered.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(cleaned_reviews, f, ensure_ascii=False, indent=2)

print(f"✅ Export done : {output_path}")

✅ Export done : ../data/catering_reviews_clustered.json
