# Projet : Analyse des Avis et Alertes ANSSI avec Enrichissement des CVE

**Objectifs :**
1. Extraire les bulletins ANSSI (avis et alertes)
2. Identifier les CVE mentionnées
3. Enrichir avec API MITRE et EPSS
4. Consolider dans un DataFrame
5. Analyser et visualiser
6. Générer des alertes

## IMPORTS ET CONFIGURATION

In [43]:
# Imports
import feedparser
import requests
import re
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configuration graphiques
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)

print("Imports OK")

Imports OK


In [44]:
# ============================================================================
# CONFIGURATION
# ============================================================================

USE_LOCAL_DATA = True  # True = données locales, False = API

# Chemins
DATA_DIR = Path("data_pour_TD_final_2026")
ALERTES_DIR = DATA_DIR / "alertes"
AVIS_DIR = DATA_DIR / "avis"
FIRST_DIR = DATA_DIR / "first"
MITRE_DIR = DATA_DIR / "mitre"
OUTPUT_DIR = Path("output")
OUTPUT_DIR.mkdir(exist_ok=True)

# URLs
URL_AVIS = "https://www.cert.ssi.gouv.fr/avis/feed"
URL_ALERTE = "https://www.cert.ssi.gouv.fr/alerte/feed"

# Rate limiting
RATE_LIMIT_DELAY = 2

# Pattern CVE
CVE_PATTERN = r"CVE-\d{4}-\d{4,7}"

print(f" Mode: {'LOCAL' if USE_LOCAL_DATA else 'EN LIGNE'}")

 Mode: LOCAL


In [45]:
# Fonction utilitaire
def get_severity(score):
    """Retourne la sévérité selon le score CVSS"""
    if score == "Non renseigné" or score is None:
        return "Non renseigné"
    try:
        score = float(score)
        if score >= 9.0:
            return "Critique"
        elif score >= 7.0:
            return "Elevée"
        elif score >= 4.0:
            return "Moyenne"
        else:
            return "Faible"
    except:
        return "Non renseigné"

## 1️. EXTRACTION DES BULLETINS

In [46]:
def charger_bulletins_local():
    """Charge bulletins depuis fichiers JSON locaux"""
    bulletins = []
    
    print("Chargement local...")
    
    # Alertes
    if ALERTES_DIR.exists():
        alerte_count=0
        for file in ALERTES_DIR.iterdir():
            if file.is_file():
                try:
                    with open(file, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        
                        if "cves" in data and isinstance(data["cves"],list):
                            cves = [cve["name"] for cve in data["cves"] if "name" in cve]
                        else:
                            # Fallback regex
                            cves = list(set(re.findall(CVE_PATTERN, str(data))))
                        
                        date = (data.get("published_at") or 
                               data.get("published") or 
                               data.get("date") or 
                               "Date inconnue")
                        
                        bulletins.append({
                            "titre": data.get("title", "Sans titre"),
                            "type": "Alerte",
                            "date": date,
                            "lien": data.get("link", ""),
                            "cves": cves
                        })

                        alerte_count+=1

                except Exception as e:
                    print(f"{file.name}: {e}")
        print(f"   - {alerte_count} alertes chargées")
    
    # Avis
    if AVIS_DIR.exists():
        avis_count=0
        for file in AVIS_DIR.iterdir():
            if file.is_file():
                try:
                    with open(file, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                        if "cves" in data and isinstance(data["cves"],list):
                            cves = [cve["name"] for cve in data["cves"] if "name" in cve]
                        else:
                            cves= list(set(re.findall(CVE_PATTERN, str(data))))
                        
                        date = (data.get("published_at") or 
                               data.get("published") or 
                               data.get("date") or 
                               "Date inconnue")
                        bulletins.append({
                            "titre": data.get("title", "Sans titre"),
                            "type": "Avis",
                            "date": date,
                            "lien": data.get("link", ""),
                            "cves": cves
                        })
                        avis_count+=1
                except Exception as e:
                    print(f"{file.name}: {e}")
        print(f"   - {avis_count} avis chargés")
    
    print(f"{len(bulletins)} bulletins au total")
    return bulletins

def charger_bulletins_online():
    """Charge bulletins depuis flux RSS"""
    bulletins = []
    
    print("Chargement RSS...")
    
    # Avis
    feed = feedparser.parse(URL_AVIS)
    for entry in feed.entries:
        bulletins.append({
            "titre": entry.title,
            "type": "Avis",
            "date": entry.published,
            "lien": entry.link,
            "cves": []
        })
    time.sleep(RATE_LIMIT_DELAY)
    
    # Alertes
    feed = feedparser.parse(URL_ALERTE)
    for entry in feed.entries:
        bulletins.append({
            "titre": entry.title,
            "type": "Alerte",
            "date": entry.published,
            "lien": entry.link,
            "cves": []
        })
    
    print(f"{len(bulletins)} bulletins")
    return bulletins

In [47]:
# Chargement
if USE_LOCAL_DATA:
    bulletins = charger_bulletins_local()
else:
    bulletins = charger_bulletins_online()

# Aperçu
print(f"\n Aperçu (5 premiers):")
for i, b in enumerate(bulletins[:5], 1):
    print(f"{i}. [{b['type']}] {b['titre'][:60]}... ({len(b['cves'])} CVE)")

Chargement local...
   - 78 alertes chargées
   - 3314 avis chargés
3392 bulletins au total

 Aperçu (5 premiers):
1. [Alerte] |MàJ] Vulnérabilité dans SonicWall SMA100... (2 CVE)
2. [Alerte] [MàJ] Vulnérabilité dans Google Chrome et Microsoft Edge... (1 CVE)
3. [Alerte] [MàJ] Vulnérabilité dans VMware vCenter Server... (1 CVE)
4. [Alerte] [MàJ] Multiples vulnérabilités dans Microsoft Exchange Serve... (7 CVE)
5. [Alerte] Multiples vulnérabilités dans Microsoft DNS server... (5 CVE)


## 2️. EXTRACTION CVE (si mode online)

In [48]:
def extraire_cves_online(lien):
    """Extrait CVE d'un bulletin en ligne"""
    try:
        url = lien.rstrip('/') + '/json/'
        time.sleep(RATE_LIMIT_DELAY)
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        return list(set(re.findall(CVE_PATTERN, str(data))))
    except Exception as e:
        print(f"{e}")
        return []

In [49]:
# Si online, extraire CVE
if not USE_LOCAL_DATA:
    print("Extraction CVE...")
    for i, b in enumerate(bulletins, 1):
        print(f"[{i}/{len(bulletins)}]", end=" ")
        b['cves'] = extraire_cves_online(b['lien'])
        print(f"✓ ({len(b['cves'])})")

In [50]:
# Stats CVE
total_cves = sum(len(b['cves']) for b in bulletins)
uniques = len(set(cve for b in bulletins for cve in b['cves']))

print(f"\n STATS CVE")
print(f"Total (doublons)  : {total_cves}")
print(f"CVE uniques       : {uniques}")
print(f"Moyenne/bulletin  : {total_cves / len(bulletins):.1f}")


 STATS CVE
Total (doublons)  : 91685
CVE uniques       : 29940
Moyenne/bulletin  : 27.0


## 3️. ENRICHISSEMENT DES CVE

In [51]:
# Fonctions MITRE
def charger_mitre_local(cve_id):
    """Charge CVE MITRE local"""
    path = MITRE_DIR / cve_id
    try:
        with open(path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except:
        return None

def charger_mitre_online(cve_id):
    """Charge CVE MITRE API"""
    try:
        time.sleep(RATE_LIMIT_DELAY)
        url = f"https://cveawg.mitre.org/api/cve/{cve_id}"
        r = requests.get(url, timeout=10)
        r.raise_for_status()
        return r.json()
    except:
        return None

def extraire_mitre(cve_id, data):
    """Extrait données MITRE"""
    if not data:
        return {
            "description": "Non renseigné",
            "cvss_score": "Non renseigné",
            "base_severity": "Non renseigné",
            "cwe_id": "Non disponible",
            "cwe_desc": "Non disponible",
            "vendor": "Non renseigné",
            "product": "Non renseigné",
            "versions": "Non renseigné"
        }
    
    try:
        cna = data.get("containers", {}).get("cna", {})
        
        # Description
        desc_list = cna.get("descriptions", [])
        description = desc_list[0].get("value", "Non renseigné") if desc_list else "Non renseigné"
        
        # CVSS
        cvss = "Non renseigné"
        metrics = cna.get("metrics", [])
        if metrics:
            m = metrics[0]
            for v in ["cvssV4_0", "cvssV3_1", "cvssV3_0", "cvssV2_0"]:
                if v in m:
                    cvss = m[v].get("baseScore", "Non renseigné")
                    break
        
        # CWE
        cwe_id = "Non disponible"
        cwe_desc = "Non disponible"
        prob = cna.get("problemTypes", [])
        if prob and "descriptions" in prob[0]:
            d = prob[0]["descriptions"][0]
            cwe_id = d.get("cweId", "Non disponible")
            cwe_desc = d.get("description", "Non disponible")
        
        # Produits
        vendor = "Non renseigné"
        product = "Non renseigné"
        versions = "Non renseigné"
        aff = cna.get("affected", [])
        if aff:
            vendor = aff[0].get("vendor", "Non renseigné")
            product = aff[0].get("product", "Non renseigné")
            v_list = [v.get("version") for v in aff[0].get("versions", []) 
                     if v.get("status") == "affected"]
            versions = ", ".join(v_list) if v_list else "Non renseigné"
        
        return {
            "description": description,
            "cvss_score": cvss,
            "base_severity": get_severity(cvss),
            "cwe_id": cwe_id,
            "cwe_desc": cwe_desc,
            "vendor": vendor,
            "product": product,
            "versions": versions
        }
    except:
        return {
            "description": "Non renseigné",
            "cvss_score": "Non renseigné",
            "base_severity": "Non renseigné",
            "cwe_id": "Non disponible",
            "cwe_desc": "Non disponible",
            "vendor": "Non renseigné",
            "product": "Non renseigné",
            "versions": "Non renseigné"
        }

In [52]:
# Fonctions EPSS
def charger_epss_local(cve_id):
    """Charge EPSS local"""
    path = FIRST_DIR / cve_id
    try:
        with open(path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            epss_data = data.get("data", [])
            if epss_data:
                return epss_data[0].get("epss", "Non renseigné")
    except:
        pass
    return "Non renseigné"

def charger_epss_online(cve_id):
    """Charge EPSS API"""
    try:
        time.sleep(RATE_LIMIT_DELAY)
        url = f"https://api.first.org/data/v1/epss?cve={cve_id}"
        r = requests.get(url, timeout=10)
        r.raise_for_status()
        data = r.json()
        epss = data.get("data", [])
        if epss:
            return epss[0].get("epss", "Non renseigné")
    except:
        pass
    return "Non renseigné"

In [53]:
# Enrichissement
print("ENRICHISSEMENT")
print("=" * 80)

tous_cves = list(set(cve for b in bulletins for cve in b['cves']))
print(f"CVE uniques à enrichir : {len(tous_cves)}")

cve_dict = {}

for i, cve_id in enumerate(tous_cves, 1):
    print(f"[{i}/{len(tous_cves)}] {cve_id}...", end=" ")
    
    # MITRE
    if USE_LOCAL_DATA:
        mitre_raw = charger_mitre_local(cve_id)
    else:
        mitre_raw = charger_mitre_online(cve_id)
    
    mitre = extraire_mitre(cve_id, mitre_raw)
    
    # EPSS
    if USE_LOCAL_DATA:
        epss = charger_epss_local(cve_id)
    else:
        epss = charger_epss_online(cve_id)
    
    cve_dict[cve_id] = {
        "cve_id": cve_id,
        "epss_score": epss,
        **mitre
    }
    
    print("✓")

print(f"\n {len(cve_dict)} CVE enrichis")

ENRICHISSEMENT
CVE uniques à enrichir : 29940
[1/29940] CVE-2023-45290... ✓
[2/29940] CVE-2024-37036... ✓
[3/29940] CVE-2024-50144... ✓
[4/29940] CVE-2024-50159... ✓
[5/29940] CVE-2024-24973... ✓
[6/29940] CVE-2023-35394... ✓
[7/29940] CVE-2024-51565... ✓
[8/29940] CVE-2024-40829... ✓
[9/29940] CVE-2025-43385... ✓
[10/29940] CVE-2024-53132... ✓
[11/29940] CVE-2025-38529... ✓
[12/29940] CVE-2023-53519... ✓
[13/29940] CVE-2023-4387... ✓
[14/29940] CVE-2024-35868... ✓
[15/29940] CVE-2024-45135... ✓
[16/29940] CVE-2024-43628... ✓
[17/29940] CVE-2024-43542... ✓
[18/29940] CVE-2020-35491... ✓
[19/29940] CVE-2022-49901... ✓
[20/29940] CVE-2023-49124... ✓
[21/29940] CVE-2024-25614... ✓
[22/29940] CVE-2024-26147... ✓
[23/29940] CVE-2022-49185... ✓
[24/29940] CVE-2020-27829... ✓
[25/29940] CVE-2023-44351... ✓
[26/29940] CVE-2023-1531... ✓
[27/29940] CVE-2024-34100... ✓
[28/29940] CVE-2025-49679... ✓
[29/29940] CVE-2021-47055... ✓
[30/29940] CVE-2025-38120... ✓
[31/29940] CVE-2019-14865... ✓
[32/

In [None]:
# Aperçu enrichissement
print("\n APERÇU (3 premiers):")
for cve in list(cve_dict.values())[:3]:
    print(f"\n🔹 {cve['cve_id']}")
    print(f"   CVSS  : {cve['cvss_score']} ({cve['base_severity']})")
    print(f"   EPSS  : {cve['epss_score']}")
    print(f"   CWE   : {cve['cwe_id']}")
    print(f"   Produit: {cve['product']}")


 APERÇU (3 premiers):

🔹 CVE-2023-45290
   CVSS  : Non renseigné (Non renseigné)
   EPSS  : 0.003260000
   CWE   : Non disponible
   Produit: net/textproto

🔹 CVE-2024-37036
   CVSS  : 9.8 (Critique)
   EPSS  : 0.000910000
   CWE   : CWE-787
   Produit: Sage 1410

🔹 CVE-2024-50144
   CVSS  : Non renseigné (Non renseigné)
   EPSS  : 0.000280000
   CWE   : Non disponible
   Produit: Linux


## 4️. CONSOLIDATION DATAFRAME

In [None]:
print("CONSTRUCTION DATAFRAME")

lignes = []

for bulletin in bulletins:
    for cve_id in bulletin["cves"]:
        cve_data = cve_dict.get(cve_id, {})
        
        ligne = {
            "Titre du bulletin (ANSSI)": bulletin["titre"],
            "Type de bulletin": bulletin["type"],
            "Date de publication": bulletin["date"],
            "Identifiant CVE": cve_id,
            "Score CVSS": cve_data.get("cvss_score", "Non renseigné"),
            "Base Severity": cve_data.get("base_severity", "Non renseigné"),
            "Type CWE": cve_data.get("cwe_id", "Non disponible"),
            "CWE Description": cve_data.get("cwe_desc", "Non disponible"),
            "Score EPSS": cve_data.get("epss_score", "Non renseigné"),
            "Lien du bulletin (ANSSI)": bulletin["lien"],
            "Description": cve_data.get("description", "Non renseigné"),
            "Editeur/Vendor": cve_data.get("vendor", "Non renseigné"),
            "Produit": cve_data.get("product", "Non renseigné"),
            "Versions affectées": cve_data.get("versions", "Non renseigné")
        }
        
        lignes.append(ligne)

df_final = pd.DataFrame(lignes)

print(f"{len(df_final)} lignes × {len(df_final.columns)} colonnes")

 CONSTRUCTION DATAFRAME
91685 lignes × 14 colonnes


In [56]:
# Nettoyage
df_final['Score CVSS'] = pd.to_numeric(df_final['Score CVSS'], errors='coerce')
df_final['Score EPSS'] = pd.to_numeric(df_final['Score EPSS'], errors='coerce')
df_final['Date de publication'] = pd.to_datetime(df_final['Date de publication'], errors='coerce')
df_final['Année'] = df_final['Date de publication'].dt.year
df_final['Mois'] = df_final['Date de publication'].dt.month
df_final['Année-Mois'] = df_final['Date de publication'].dt.to_period('M')

print("Nettoyage OK")

Nettoyage OK


In [57]:
# Aperçu
df_final.head(10)

Unnamed: 0,Titre du bulletin (ANSSI),Type de bulletin,Date de publication,Identifiant CVE,Score CVSS,Base Severity,Type CWE,CWE Description,Score EPSS,Lien du bulletin (ANSSI),Description,Editeur/Vendor,Produit,Versions affectées,Année,Mois,Année-Mois
0,|MàJ] Vulnérabilité dans SonicWall SMA100,Alerte,NaT,CVE-2021-20016,,Non renseigné,CWE-89,CWE-89: Improper Neutralization of Special Ele...,0.02681,,A SQL-Injection vulnerability in the SonicWall...,SonicWall,SonicWall SMA100,SMA100 build version 10.x,,,NaT
1,|MàJ] Vulnérabilité dans SonicWall SMA100,Alerte,NaT,CVE-2014-6271,,Non renseigné,Non disponible,,0.97214,,GNU Bash through 4.3 processes trailing string...,,,,,,NaT
2,[MàJ] Vulnérabilité dans Google Chrome et Micr...,Alerte,NaT,CVE-2021-21148,,Non renseigné,Non disponible,Heap buffer overflow,0.01431,,Heap buffer overflow in V8 in Google Chrome pr...,Google,Chrome,unspecified,,,NaT
3,[MàJ] Vulnérabilité dans VMware vCenter Server,Alerte,NaT,CVE-2021-21972,,Non renseigné,Non disponible,Remote code execution vulnerability,0.97419,,The vSphere Client (HTML5) contains a remote c...,,VMware vCenter Server,"7.x before 7.0 U1c, 6.7 before 6.7 U3l, 6.5 be...",,,NaT
4,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-26858,7.8,Elevée,Non disponible,Remote Code Execution,0.15565,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2019,15.02.0,,,NaT
5,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-27078,9.1,Critique,Non disponible,Remote Code Execution,0.02212,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2016 Cumulative Upda...,15.01.0,,,NaT
6,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-26857,7.8,Elevée,Non disponible,Remote Code Execution,0.61868,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2016 Cumulative Upda...,15.01.0,,,NaT
7,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-26855,9.1,Critique,Non disponible,Remote Code Execution,0.97508,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2016 Cumulative Upda...,15.01.0,,,NaT
8,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-27065,7.8,Elevée,Non disponible,Remote Code Execution,0.95775,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2019,15.02.0,,,NaT
9,[MàJ] Multiples vulnérabilités dans Microsoft ...,Alerte,NaT,CVE-2021-26412,9.1,Critique,Non disponible,Remote Code Execution,0.02212,,Microsoft Exchange Server Remote Code Executio...,Microsoft,Microsoft Exchange Server 2013 Cumulative Upda...,15.00.0,,,NaT


In [58]:
# Export CSV
output_file = OUTPUT_DIR / "tableau_final.csv"
df_final.to_csv(output_file, index=False, encoding="utf-8")
print(f" Sauvegardé dans : {output_file}")

 Sauvegardé dans : output\tableau_final.csv


In [59]:
# Stats descriptives
df_final.describe(include='all')

Unnamed: 0,Titre du bulletin (ANSSI),Type de bulletin,Date de publication,Identifiant CVE,Score CVSS,Base Severity,Type CWE,CWE Description,Score EPSS,Lien du bulletin (ANSSI),Description,Editeur/Vendor,Produit,Versions affectées,Année,Mois,Année-Mois
count,91685,91685,0,91685,12194.0,91685,91685,91685,81283.0,91685.0,91685,91685,91685,91685,0.0,0.0,0.0
unique,810,2,,29940,,5,379,1911,,1.0,14592,363,1832,6829,,,0.0
top,Multiples vulnérabilités dans le noyau Linux d...,Avis,,CVE-2023-44487,,Non renseigné,Non disponible,Non disponible,,,Non renseigné,Linux,Linux,Non renseigné,,,
freq,23950,91524,,65,,79491,77967,67639,,91685.0,28063,39598,38597,29336,,,
mean,,,NaT,,6.533156,,,,0.011064,,,,,,,,
min,,,NaT,,0.0,,,,2e-05,,,,,,,,
25%,,,NaT,,5.3,,,,0.0003,,,,,,,,
50%,,,NaT,,6.7,,,,0.00044,,,,,,,,
75%,,,NaT,,7.8,,,,0.00081,,,,,,,,
max,,,NaT,,10.0,,,,0.97515,,,,,,,,
