In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from functions import *

class ClimateRiskPipeline:
    """Pipeline complet pour l'analyse des risques climatiques sectoriels"""
    
    def __init__(self, output_dir='./'):
        self.output_dir = output_dir
        self.df_sectors = None
        self.financial_data = None
        self.df_complete = None
        self.analysis_results = None
        self.correlations = None
        self.risk_stats = None
        
    def run_complete_analysis(self, fetch_live_data=True, save_results=True):
        """Ex√©cute le pipeline d'analyse complet"""
        print("PIPELINE ANALYSE RISQUES CLIMATIQUES - D√âBUT")
        print("=" * 60)
        
        start_time = datetime.now()
        
        # √âtape 1: Chargement des donn√©es de base
        print("\n√âTAPE 1: Chargement des donn√©es sectorielles de base")
        self._load_base_data()
        
        # √âtape 2: R√©cup√©ration des donn√©es financi√®res
        print("\n√âTAPE 2: R√©cup√©ration des donn√©es financi√®res")
        self._fetch_financial_data(fetch_live_data)
        
        # √âtape 3: Calcul des scores de risque climatique
        print("\n√âTAPE 3: Calcul des scores de risque climatique")
        self._calculate_climate_scores()
        
        # √âtape 4: Fusion des donn√©es (AVANT classification)
        print("\n√âTAPE 4: Fusion des donn√©es climatiques et financi√®res")
        self._merge_datasets()
        
        # √âtape 5: Classification et enrichissement (APR√àS fusion)
        print("\n√âTAPE 5: Classification des niveaux de risque")
        self._classify_and_enrich()
        
        # √âtape 6: Analyse des corr√©lations
        print("\n√âTAPE 6: Analyse des corr√©lations et statistiques")
        self._analyze_correlations()
        
        # √âtape 7: G√©n√©ration du rapport
        print("\n√âTAPE 7: G√©n√©ration du rapport d'analyse")
        self._generate_analysis_report()
        
        # √âtape 8: Sauvegarde
        if save_results:
            print("\n√âTAPE 8: Sauvegarde des r√©sultats")
            self._save_results()
        
        end_time = datetime.now()
        execution_time = (end_time - start_time).total_seconds()
        
        print(f"\nPIPELINE TERMIN√â en {execution_time:.1f}s")
        print("=" * 60)
        
        return self.df_complete
    
    def _load_base_data(self):
        """Charge les donn√©es sectorielles de base"""
        self.df_sectors = load_sector_base_data()
        print(f"‚úì {len(self.df_sectors)} secteurs charg√©s")
        print(f"  Colonnes: {list(self.df_sectors.columns)}")
    
    def _fetch_financial_data(self, fetch_live=True):
        """R√©cup√®re les donn√©es financi√®res"""
        if fetch_live:
            print("  R√©cup√©ration des donn√©es financi√®res en temps r√©el...")
            self.financial_data = fetch_financial_data(365)
            success_count = len([v for v in self.financial_data.values() if v['Annual_Return'] != 0])
            print(f"‚úì Donn√©es r√©cup√©r√©es pour {success_count}/{len(self.financial_data)} secteurs")
        else:
            print("  Mode simulation: donn√©es financi√®res par d√©faut")
            sectors = self.df_sectors['Sector'].tolist()
            self.financial_data = {
                sector: {'Annual_Return': np.random.normal(8, 15), 'Volatility': np.random.normal(20, 5)}
                for sector in sectors
            }
            print(f"‚úì Donn√©es simul√©es pour {len(self.financial_data)} secteurs")
    
    def _calculate_climate_scores(self):
        """Calcule les scores de risque climatique"""
        self.df_sectors = calculate_climate_risk_score(self.df_sectors)
        avg_score = self.df_sectors['Climate_Risk_Score'].mean()
        high_risk_count = len(self.df_sectors[self.df_sectors['Climate_Risk_Score'] >= 70])
        print(f"‚úì Scores calcul√©s - Moyenne: {avg_score:.1f}, Risque √©lev√©: {high_risk_count} secteurs")
    
    def _merge_datasets(self):
        """Fusionne les donn√©es climatiques et financi√®res"""
        self.df_complete = merge_climate_financial_data(self.df_sectors, self.financial_data)
        print(f"‚úì Datasets fusionn√©s - Shape finale: {self.df_complete.shape}")
        print(f"  Colonnes finales: {len(self.df_complete.columns)}")
    
    def _classify_and_enrich(self):
        """Classifie les niveaux de risque et ajoute les m√©triques"""
        # Maintenant on peut utiliser df_complete qui contient Annual_Return
        self.df_complete = classify_risk_levels(self.df_complete)
        risk_distribution = self.df_complete['Risk_Level'].value_counts()
        print(f"‚úì Classification termin√©e:")
        for level, count in risk_distribution.items():
            print(f"  {level}: {count} secteurs")
    
    def _analyze_correlations(self):
        """Analyse les corr√©lations entre risque et performance"""
        self.correlations, self.risk_stats = analyze_correlations(self.df_complete)
        print(f"‚úì Corr√©lations calcul√©es:")
        print(f"  Risque vs Rendement: {self.correlations['climate_return']:.3f}")
        print(f"  Risque vs Volatilit√©: {self.correlations['climate_volatility']:.3f}")
    
    def _generate_analysis_report(self):
        """G√©n√®re le rapport d'analyse complet"""
        self.analysis_results = generate_risk_report(self.df_complete)
        print(f"‚úì Rapport g√©n√©r√©:")
        print(f"  Secteurs √† haut risque: {self.analysis_results['high_risk_count']}")
        print(f"  Score moyen: {self.analysis_results['avg_risk_score']:.1f}")
    
    def _save_results(self):
        """Sauvegarde les r√©sultats d'analyse"""
        output_file = f"{self.output_dir}climate_risk_analysis.csv"
        result = export_analysis_results(self.df_complete, output_file)
        print(f"‚úì {result}")
    
    def get_top_risk_sectors(self, n=3):
        """Retourne les secteurs les plus risqu√©s"""
        if self.df_complete is None:
            return None
        return self.df_complete.nlargest(n, 'Climate_Risk_Score')[['Sector', 'Climate_Risk_Score', 'Annual_Return']]
    
    def get_low_risk_sectors(self, n=3):
        """Retourne les secteurs les moins risqu√©s"""
        if self.df_complete is None:
            return None
        return self.df_complete.nsmallest(n, 'Climate_Risk_Score')[['Sector', 'Climate_Risk_Score', 'Annual_Return']]
    
    def calculate_portfolio_risk_score(self, portfolio_weights):
        """Calcule le score de risque climatique d'un portefeuille"""
        if self.df_complete is None:
            raise ValueError("Pipeline non ex√©cut√© - Lancer run_complete_analysis() d'abord")
        
        portfolio_analysis = calculate_portfolio_risk(portfolio_weights, self.df_complete)
        return portfolio_analysis
    
    def print_summary_report(self):
        """Affiche un rapport de synth√®se"""
        if self.analysis_results is None:
            print("Aucune analyse disponible")
            return
        
        print("\n" + "=" * 60)
        print("RAPPORT DE SYNTH√àSE - RISQUES CLIMATIQUES SECTORIELS")
        print("=" * 60)
        
        print(f"\nVUE D'ENSEMBLE:")
        print(f"‚Ä¢ Secteurs analys√©s: {self.analysis_results['total_sectors']}")
        print(f"‚Ä¢ Score moyen de risque: {self.analysis_results['avg_risk_score']:.1f}/100")
        print(f"‚Ä¢ Secteurs √† haut risque: {self.analysis_results['high_risk_count']}")
        print(f"‚Ä¢ Corr√©lation risque-rendement: {self.analysis_results['correlation_climate_return']:.3f}")
        
        print(f"\nSECTEURS √Ä HAUT RISQUE CLIMATIQUE:")
        for i, sector in enumerate(self.analysis_results['top_risk_sectors'], 1):
            print(f"{i}. {sector['Sector']}: Score {sector['Climate_Risk_Score']:.1f}")
        
        print(f"\nSECTEURS √Ä FAIBLE RISQUE CLIMATIQUE:")
        for i, sector in enumerate(self.analysis_results['low_risk_sectors'], 1):
            print(f"{i}. {sector['Sector']}: Score {sector['Climate_Risk_Score']:.1f}")
        
        print("\nRECOMMANDATIONS:")
        if self.analysis_results['correlation_climate_return'] < -0.3:
            print("‚ö†Ô∏è  Corr√©lation n√©gative forte: Les secteurs √† haut risque climatique sous-performent")
        elif self.analysis_results['correlation_climate_return'] > 0.3:
            print("üìà Paradoxe: Les secteurs √† haut risque climatique surperforment (temporaire?)")
        else:
            print("‚ûñ Pas de corr√©lation claire risque-performance")
        
        high_risk_pct = (self.analysis_results['high_risk_count'] / self.analysis_results['total_sectors']) * 100
        if high_risk_pct > 40:
            print("üî¥ Plus de 40% des secteurs pr√©sentent un risque climatique √©lev√©")
        elif high_risk_pct > 20:
            print("üü° Risque climatique mod√©r√© dans le portefeuille sectoriel")
        else:
            print("üü¢ Profil de risque climatique globalement acceptable")
        
        print("=" * 60)
    
    def get_dashboard_data(self):
        """Retourne les donn√©es format√©es pour le dashboard Streamlit"""
        if self.df_complete is None:
            return None
            
        return {
            'dataframe': self.df_complete,
            'correlations': self.correlations,
            'risk_stats': self.risk_stats,
            'analysis_results': self.analysis_results
        }

In [2]:
if __name__ == "__main__":
    print("LANCEMENT DU PIPELINE RISQUES CLIMATIQUES")
    
    try:
        # Initialisation du pipeline
        pipeline = ClimateRiskPipeline(output_dir='./')
        
        # Ex√©cution compl√®te
        df_results = pipeline.run_complete_analysis(
            fetch_live_data=True,
            save_results=True
        )
        
        # Affichage du rapport de synth√®se
        pipeline.print_summary_report()
        
        print("\nPIPELINE TERMIN√â AVEC SUCC√àS!")
        print("Donn√©es pr√™tes pour le dashboard Streamlit")
        
    except Exception as e:
        print(f"ERREUR PIPELINE: {e}")
        raise

LANCEMENT DU PIPELINE RISQUES CLIMATIQUES
PIPELINE ANALYSE RISQUES CLIMATIQUES - D√âBUT

√âTAPE 1: Chargement des donn√©es sectorielles de base
‚úì 11 secteurs charg√©s
  Colonnes: ['Sector', 'CO2_Intensity', 'Water_Risk_Score', 'Regulatory_Risk', 'Physical_Risk_Exposure']

√âTAPE 2: R√©cup√©ration des donn√©es financi√®res
  R√©cup√©ration des donn√©es financi√®res en temps r√©el...


  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)
  data = yf.download(ticker, start=start_date, end=end_date, progress=False)


‚úì Donn√©es r√©cup√©r√©es pour 0/11 secteurs

√âTAPE 3: Calcul des scores de risque climatique
‚úì Scores calcul√©s - Moyenne: 42.9, Risque √©lev√©: 2 secteurs

√âTAPE 4: Fusion des donn√©es climatiques et financi√®res
‚úì Datasets fusionn√©s - Shape finale: (11, 8)
  Colonnes finales: 8

√âTAPE 5: Classification des niveaux de risque
‚úì Classification termin√©e:
  Faible: 6 secteurs
  Mod√©r√©: 3 secteurs
  √âlev√©: 2 secteurs

√âTAPE 6: Analyse des corr√©lations et statistiques
‚úì Corr√©lations calcul√©es:
  Risque vs Rendement: nan
  Risque vs Volatilit√©: nan

√âTAPE 7: G√©n√©ration du rapport d'analyse
‚úì Rapport g√©n√©r√©:
  Secteurs √† haut risque: 2
  Score moyen: 42.9

√âTAPE 8: Sauvegarde des r√©sultats
‚úì Analyse sauvegard√©e: ./climate_risk_analysis.csv

PIPELINE TERMIN√â en 2.5s

RAPPORT DE SYNTH√àSE - RISQUES CLIMATIQUES SECTORIELS

VUE D'ENSEMBLE:
‚Ä¢ Secteurs analys√©s: 11
‚Ä¢ Score moyen de risque: 42.9/100
‚Ä¢ Secteurs √† haut risque: 2
‚Ä¢ Corr√©lation risque-re

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
