In [20]:
COLLECTION_NAME = "Verkehr"        
OUT_CSV_PATH = "./export/ExportVerkehr.csv"

In [21]:
import os
from dotenv import load_dotenv
from pymongo import MongoClient
import pandas as pd
from tqdm import tqdm

In [22]:
load_dotenv()

mongo_user = os.getenv("MONGO_USER")
mongo_pass = os.getenv("MONGO_PASS")
mongo_host = os.getenv("MONGO_HOST", "localhost")
mongo_port = os.getenv("MONGO_PORT", "27017")
mongo_db   = os.getenv("MONGO_DB")

if not all([mongo_user, mongo_pass, mongo_host, mongo_port, mongo_db]):
    raise ValueError("Mongo ENV Variablen unvollständig. Bitte .env prüfen: MONGO_USER, MONGO_PASS, MONGO_HOST, MONGO_PORT, MONGO_DB")

uri = f"mongodb://{mongo_user}:{mongo_pass}@{mongo_host}:{mongo_port}/?authSource=admin"
client = MongoClient(uri)
db = client[mongo_db]

db.command("ping")
print("MongoDB Verbindung OK:", mongo_host, mongo_port, "DB:", mongo_db)

MongoDB Verbindung OK: localhost 27017 DB: LuftqualitaetDB


In [23]:
from typing import Optional, Dict, Any, List

def collection_to_df(
    collection_name: str,
    query: Optional[Dict[str, Any]] = None,
    projection: Optional[Dict[str, int]] = None,
    batch_size: int = 5000,
    limit: Optional[int] = None
) -> pd.DataFrame:
    col = db[collection_name]
    query = query or {}

    cursor = col.find(query, projection=projection, batch_size=batch_size)
    if limit is not None:
        cursor = cursor.limit(int(limit))

    rows: List[dict] = []
    for doc in tqdm(cursor, desc=f"Read -> {collection_name}"):
        # Mongo _id ist ein ObjectId → für CSV lieber als String
        if "_id" in doc:
            doc["_id"] = str(doc["_id"])
        rows.append(doc)

    return pd.DataFrame(rows)

In [24]:
def save_df_as_csv(
    df: pd.DataFrame,
    out_path: str,
    sep: str = ";",
    decimal: str = ",",
    encoding: str = "utf-8"
):
    # Hinweis: pandas schreibt numerische Werte ohne Tausenderpunkte.
    # Beim Öffnen in Excel wird decimal=',' berücksichtigt.
    df.to_csv(out_path, index=False, sep=sep, decimal=decimal, encoding=encoding)

In [25]:
BATCH_SIZE = 5000

# Optional:
QUERY = {}           # z.B. {"station_id": 123} oder leer lassen
PROJECTION = None    # z.B. {"_id": 1, "timestamp": 1, "pm10": 1} oder None
LIMIT = None         # z.B. 10000 für Test-Export

df = collection_to_df(
    COLLECTION_NAME,
    query=QUERY,
    projection=PROJECTION,
    batch_size=BATCH_SIZE,
    limit=LIMIT
)

print("Daten geladen. Shape:", df.shape)
display(df.head())

Read -> Verkehr: 33it [00:00, 17194.04it/s]

Daten geladen. Shape: (33, 12)





Unnamed: 0,_id,NUTS1,NUTS2,NUTS3,DISTRICT_CODE,SUB_DISTRICT_CODE,YEAR,UNIT,REF_YEAR,ROAD_TRAFFIC,SCWR_CALC,_imported_at
0,695f9339440359c0b02efb73,AT1,AT13,AT130,90001,0,1990,1.000 t,2022,2162.23,1344.17,2026-01-08 11:21:29.340
1,695f9339440359c0b02efb74,AT1,AT13,AT130,90001,0,1991,1.000 t,2022,2396.25,1386.75,2026-01-08 11:21:29.340
2,695f9339440359c0b02efb75,AT1,AT13,AT130,90001,0,1992,1.000 t,2022,2384.47,1437.86,2026-01-08 11:21:29.340
3,695f9339440359c0b02efb76,AT1,AT13,AT130,90001,0,1993,1.000 t,2022,2396.73,1458.04,2026-01-08 11:21:29.340
4,695f9339440359c0b02efb77,AT1,AT13,AT130,90001,0,1994,1.000 t,2022,2395.62,1508.45,2026-01-08 11:21:29.340


In [26]:
import os
os.makedirs(os.path.dirname(OUT_CSV_PATH), exist_ok=True)

save_df_as_csv(df, OUT_CSV_PATH, sep=";", decimal=",")
print("Export gespeichert unter:", OUT_CSV_PATH)

Export gespeichert unter: ./export/ExportVerkehr.csv
