Importation des libraries

In [73]:
import pandas as pd
import feedparser as fp
import requests as req


Etape 1 - Extraction des Flux RSS

In [74]:
url = "https://www.cert.ssi.gouv.fr/avis/feed"
rss_feed = fp.parse(url)

# Check if the feed was parsed correctly
if rss_feed.bozo:
    raise ValueError("Failed to parse RSS feed. Please check the URL.\n") 
else:
    print(f"Successfully parsed RSS feed from {url}\n")   

rows = []

# Loop through RSS feed entries and collect data
for entry in rss_feed.entries:
    rows.append({
        "title": entry.title,
        "link": entry.link,
        "published": entry.published,
        "summary": entry.summary
    })

# Convert list of rows into a DataFrame
df_flux_rss = pd.DataFrame(rows, columns=["title", "link", "published", "summary"])

# Convert 'published' to datetime, handling errors
df_flux_rss["published"] = pd.to_datetime(df_flux_rss["published"], format="%a, %d %b %Y %H:%M:%S %z", errors='coerce')

# Display the first few rows of the DataFrame & print the DataFrame structure
print(df_flux_rss.info())
print(df_flux_rss.head())

Successfully parsed RSS feed from https://www.cert.ssi.gouv.fr/avis/feed

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   title      40 non-null     object             
 1   link       40 non-null     object             
 2   published  40 non-null     datetime64[ns, UTC]
 3   summary    40 non-null     object             
dtypes: datetime64[ns, UTC](1), object(3)
memory usage: 1.4+ KB
None
                                               title  \
0  Multiples vulnérabilités dans Mattermost Serve...   
1  Multiples vulnérabilités dans les produits SAP...   
2        Vulnérabilité dans Roundcube (02 juin 2025)   
3            Vulnérabilité dans cURL  (04 juin 2025)   
4  Multiples vulnérabilités dans les produits VMw...   

                                                link  \
0  https://www.cert.ssi.gouv.fr/avis/CERTFR-2025-.

Etape 2 - Extraction des CVE

In [75]:
def extract_cve_from_link(link):
    json_link = link.rstrip("/") + "/json/" 
    response = req.get(json_link)
    if response.status_code != 200:
        print(f"Erreur: impossible d'accéder à {json_link}")
        return []

    try:
        data = response.json()
    except ValueError:
        print(f"Le contenu de {json_link} n'est pas un JSON valide.")
        return []
    
        
    cve_list = list(data["cves"])
    print(f"Extracted CVEs from {link}: {cve_list}")
    return cve_list

In [76]:
#extract CVEs from each link in the DataFrame & store them in a new column
df_flux_rss["cve"] = df_flux_rss["link"].apply(extract_cve_from_link)

Extracted CVEs from https://www.cert.ssi.gouv.fr/avis/CERTFR-2025-AVI-0392/: [{'name': 'CVE-2025-4128', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-4128'}, {'name': 'CVE-2025-4573', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-4573'}]
Extracted CVEs from https://www.cert.ssi.gouv.fr/avis/CERTFR-2025-AVI-0396/: [{'name': 'CVE-2025-43003', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-43003'}, {'name': 'CVE-2025-43007', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-43007'}, {'name': 'CVE-2025-23191', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-23191'}, {'name': 'CVE-2025-42999', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-42999'}, {'name': 'CVE-2025-43009', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-43009'}, {'name': 'CVE-2025-43011', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-43011'}, {'name': 'CVE-2025-43006', 'url': 'https://www.cve.org/CVERecord?id=CVE-2025-43006'}, {'name': 'CVE-2025-0060', 'url': 'https://www.cve.org/CVERecord?id=CVE-202

Etape 3 - Enrichissement des CVE

In [81]:
def connexion_to_api_cve(cve_id):
    url = f"https://cveawg.mitre.org/api/cve/{cve_id}"
    response = req.get(url)
    if response.status_code != 200:
        print(f"Erreur: impossible d'accéder à {url}")
        return None
    data = response.json()
    
    cve_id = data.get("cveMetadata", {}).get("cveId", None)
    
    # Description anglaise
    description = None
    desc_list = data.get("containers", {}).get("cna", {}).get("descriptions", [])
    for d in desc_list:
        if d.get("lang") == "en":
            description = d.get("value")
            break

    # CVSS score, gestion de plusieurs formats possibles
    cvss_score = None
    metrics = data.get("containers", {}).get("cna", {}).get("metrics", [])
    if metrics:
        for metric in metrics:
            for key in ["cvssV3_1", "cvssV3_0", "cvssV2"]:
                if key in metric:
                    cvss_score = metric[key].get("baseScore")
                    if cvss_score is not None:
                        break
            if cvss_score is not None:
                break

    # CWE(s)
    cwe_list = []
    problem_types = data.get("containers", {}).get("cna", {}).get("problemTypes", [])
    for pt in problem_types:
        for desc in pt.get("descriptions", []):
            if desc.get("lang") == "en":
                cwe_list.append(desc.get("description"))

    cwe = ", ".join(cwe_list) if cwe_list else None

    # Produits affectés
    affected_products = []
    affected = data.get("containers", {}).get("cna", {}).get("affected", [])
    for prod in affected:
        vendor = prod.get("vendor")
        product = prod.get("product")
        if vendor and product:
            affected_products.append(f"{vendor} {product}")

    affected_products_str = ", ".join(affected_products) if affected_products else None

    return {
        "cve_id": cve_id,
        "description": description,
        "cvss_score": cvss_score,
        "cwe": cwe,
        "affected_products": affected_products_str
    }



In [82]:
set_cve_uniques = set()

for cve_list in df_flux_rss["cve"]:
    for cve_obj in cve_list:  # cve_list est une liste de dicts
        set_cve_uniques.add(cve_obj["name"])


liste_cve_info = []

for cve_id in set_cve_uniques:
    data = connexion_to_api_cve(cve_id)  # ta fonction qui retourne un dictionnaire
    if data:
        liste_cve_info.append(data)


df_cves = pd.DataFrame(liste_cve_info)




Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-26434
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-32317
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-26457
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2024-0028
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-32316
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-22862
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-5121
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-22485
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-32314
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-32320
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-32334
Erreur: impossible d'accéder à https://cveawg.mitre.org/api/cve/CVE-2025-36887
Erreur: impossible d'accéder à https://cveawg.mitre.or

In [83]:
# Explore the DataFrame structure
print(df_cves.info())  
# Display the first few rows of the CVE DataFrame
print(df_cves.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1045 entries, 0 to 1044
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cve_id             1045 non-null   object 
 1   description        972 non-null    object 
 2   cvss_score         277 non-null    float64
 3   cwe                323 non-null    object 
 4   affected_products  1040 non-null   object 
dtypes: float64(1), object(4)
memory usage: 40.9+ KB
None
           cve_id                                        description  \
0  CVE-2024-50290  In the Linux kernel, the following vulnerabili...   
1  CVE-2025-20163  A vulnerability in the SSH implementation of C...   
2   CVE-2024-9512  An issue has been discovered in GitLab EE affe...   
3  CVE-2024-58069  In the Linux kernel, the following vulnerabili...   
4  CVE-2025-21950  In the Linux kernel, the following vulnerabili...   

   cvss_score                                                cwe  \