In [None]:
from pathlib import Path

ROOT = Path.cwd().resolve().parents[2]
DATA_DIR = ROOT / 'data'
DOCS_DIR = ROOT / 'docs'
LOGS_DIR = ROOT / 'logs'
print('ROOT=', ROOT)
print('DATA_DIR=', DATA_DIR)
print('DOCS_DIR=', DOCS_DIR)
print('LOGS_DIR=', LOGS_DIR)


In [None]:
# Override to root-level directories
try:
    ROOT = ROOT
except NameError:
    from pathlib import Path
    ROOT = Path.cwd().resolve().parents[2]
LOGS = ROOT / 'logs'
DATA = ROOT / 'data' / 'raw'
print('ROOT=', ROOT)
print('LOGS=', LOGS)
print('DATA(raw)=', DATA)


In [None]:
# Charger .env depuis la racine et créer un .env.example si absent
from pathlib import Path

from dotenv import load_dotenv

env_path = ROOT / '.env'
loaded = load_dotenv(env_path)
if loaded:
    print('✅ .env chargé:', env_path)
else:
    print('⚠️ .env non trouvé:', env_path)
    example = ROOT / '.env.example'
    if not example.exists():
        example.write_text("""
# PostgreSQL
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=datasens
POSTGRES_USER=ds_user
POSTGRES_PASS=ds_pass

# API Keys (optionnelles pour démo)
OWM_API_KEY=
KAGGLE_USERNAME=
KAGGLE_KEY=

# Git (optionnel)
GIT_USER_NAME=
GIT_USER_EMAIL=
""".strip()+"\n", encoding='utf-8')
        print('📄 Exemple créé:', example)
    else:
        print('📄 Exemple déjà présent:', example)



# DataSens E1_v2 — 01_setup_env

- Objectifs: arborescence raw, logging, .env
- Prérequis: Python, venv activé, `pip install -r requirements.txt`
- Ordre global E1_v2: 01 → 02 → 03 → 04 → 05
- Guide: docs/GUIDE_TECHNIQUE_E1.md


In [None]:
# DataSens E1_v2 - 01_setup_env
# Config .env, arborescence raw, logging
import logging
from datetime import UTC, datetime
from pathlib import Path

ROOT = Path.cwd()
DATA = ROOT / "data" / "raw"
for sub in ["kaggle","api/owm","api/newsapi","rss","scraping/multi","scraping/viepublique","scraping/datagouv","gdelt","manifests"]:
    (DATA / sub).mkdir(parents=True, exist_ok=True)
print("✅ Arborescence raw créée:", DATA)

# Logging
LOGS = ROOT.parent / "logs"
LOGS.mkdir(parents=True, exist_ok=True)
stamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
log_file = LOGS / f"collecte_{stamp}.log"
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s', datefmt='%H:%M:%S', handlers=[logging.FileHandler(log_file, encoding='utf-8'), logging.StreamHandler()])
logging.info("Système de logging initialisé")
print("📄 Log:", log_file)

