# Connection to DB + Data export

In [1]:
# Récupération variable enrionnement
from dotenv import load_dotenv
import os

load_dotenv()  # charge le .env automatiquement
MONGODB_URI = os.getenv("MONGODB_URI")

if not MONGODB_URI:
    raise ValueError("MONGODB_URI is not set in the environment variables.")

In [2]:
from pymongo import MongoClient

client = MongoClient(MONGODB_URI)

if client is not None:
    print("Connected to DB")
else:
    print("Failed to connect to DB")

Connected to DB


In [3]:
# Base de données
db = client["ai-wedder"]

print("Successfully connected to :", db.name)

Successfully connected to : ai-wedder


### Chargement des datas

In [4]:
# Modèle cible
Review = db["reviews"]

# Récupération des données
reviews = Review.aggregate([
    {
        "$lookup": {
            "from": "venues",
            "localField": "venue",
            "foreignField": "_id",
            "as": "venues"
        }
    },
    {
        "$addFields": {
            "company_id": { "$first": "$venues._id" },
            "company_name": { "$first": "$venues.name" }
        }
    },
    {
        "$project": {
            "_id": 0,
            "venue": 0,
            "venues": 0,
            "__v": 0,
            "createdAt": 0,
            "updatedAt": 0
        }
    },
    {
      "$addFields": {
        "company_id": {
          "$toString": "$company_id"
        }
      }
    }
])

reviews_list = list(reviews)

In [5]:
import pandas as pd

# Transformation en DF
for review in reviews_list:
    review.pop('_id', None)

# Conversion en DataFrame
df = pd.DataFrame(reviews_list)

# Création du dossier d'export
output_dir = "../data"
os.makedirs(output_dir, exist_ok=True)

# Sauvegarde en CSV
csv_path = os.path.join(output_dir, "catering_reviews.csv")
df.to_csv(csv_path, index=False)

print(f"✅ Export done : {csv_path}")

✅ Export done : ../data/catering_reviews.csv
