In [5]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
analysis_pipeline_custom_output.py

Version :
----------
Pipeline simplifié où on spécifie :
    - le dossier de travail (`TARGET_DIR`)
    - le nom du fichier de sortie (`OUTPUT_CSV_NAME`)

Le dossier doit contenir un CSV "analysis*.csv" avec les colonnes :
    company, ticker, avg_pos, avg_neg, avg_neu, avg_mix

Le script calcule un sentiment_score ∈ [0,1] pour chaque entreprise,
et écrit les colonnes :
    name, ticker, sentiment_score
dans le CSV spécifié.
"""

import os
import glob
import os.path as osp
import pandas as pd

# ---------------------------------------------------------------------
# CONFIG
# ---------------------------------------------------------------------
TARGET_DIR = "/home/sagemaker-user/shared/streamlit-app/data/analysis_result/5"   # Dossier à traiter
OUTPUT_CSV_NAME = "5.中华人民共和国能源法__中国政府网.csv"

# ---------------------------------------------------------------------
# PIPELINE PRINCIPAL
# ---------------------------------------------------------------------
def main():
    output_path = osp.join(TARGET_DIR, OUTPUT_CSV_NAME)
    print(f"[INFO] Working directory: {TARGET_DIR}")
    print(f"[INFO] Output CSV: {output_path}")

    # 1) Charger le fichier analysis*.csv
    analysis_df = load_analysis_csv(TARGET_DIR)

    # 2) Calculer les scores agrégés
    company_scores_df = compute_company_sentiment_scores(analysis_df)

    # 3) Écrire dans le CSV demandé
    write_company_scores_csv(output_path, company_scores_df)

    print(f"[OK] Fichier généré : {output_path}")
    print("This document has a huge impact ✅")


# ---------------------------------------------------------------------
# SOUS-FONCTIONS
# ---------------------------------------------------------------------
def load_analysis_csv(target_dir: str) -> pd.DataFrame:
    """Cherche le CSV qui commence par 'analysis' dans le dossier."""
    pattern = osp.join(target_dir, "analysis*.csv")
    matches = glob.glob(pattern)
    if not matches:
        raise FileNotFoundError(f"Aucun fichier 'analysis*.csv' trouvé dans {target_dir}")
    path = sorted(matches)[0]
    print(f"[LOAD] {path}")
    df = pd.read_csv(path)
    return df


def compute_company_sentiment_scores(df: pd.DataFrame) -> pd.DataFrame:
    """Calcule un score global ∈ [0,1] à partir de avg_pos, avg_neg, avg_neu, avg_mix."""
    required = ["company", "ticker", "avg_pos", "avg_neg", "avg_neu", "avg_mix"]
    for col in required:
        if col not in df.columns:
            raise ValueError(f"Colonne manquante : {col}")

    def _score(row):
        pos, neg, neu, mix = float(row.avg_pos), float(row.avg_neg), float(row.avg_neu), float(row.avg_mix)
        raw = 1.0 * pos + 0.5 * mix + 0.25 * neu - 1.0 * neg
        score = (raw + 1.0) / 2.0
        return max(0.0, min(1.0, score))

    out = pd.DataFrame({
        "name": df["company"],
        "ticker": df["ticker"],
        "sentiment_score": df.apply(_score, axis=1)
    })
    return out


def write_company_scores_csv(output_path: str, df: pd.DataFrame):
    """Écrit le CSV final à l’endroit exact demandé."""
    os.makedirs(osp.dirname(output_path), exist_ok=True)
    df.to_csv(output_path, index=False)
    print(f"[WRITE] Résultats écrits dans {output_path}")


# ---------------------------------------------------------------------
# MAIN
# ---------------------------------------------------------------------
if __name__ == "__main__":
    main()

[INFO] Working directory: /home/sagemaker-user/shared/streamlit-app/data/analysis_result/4
[INFO] Output CSV: /home/sagemaker-user/shared/streamlit-app/data/analysis_result/4/4.REGULATION (EU) 20241689 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL.csv
[LOAD] /home/sagemaker-user/shared/streamlit-app/data/analysis_result/4/analysis_regulation-eu-20241689-of-the-european-parliament-and-of-the-council.csv
[WRITE] Résultats écrits dans /home/sagemaker-user/shared/streamlit-app/data/analysis_result/4/4.REGULATION (EU) 20241689 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL.csv
[OK] Fichier généré : /home/sagemaker-user/shared/streamlit-app/data/analysis_result/4/4.REGULATION (EU) 20241689 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL.csv
This document has a huge impact ✅
