In [1]:
from google.colab import drive
import os

# --- Unmount any existing Drive mount (if needed) ---
drive.flush_and_unmount()

# --- (Optional) Clear local /content/drive mountpoint (disabled for safety) ---
drive_path = '/content/drive'
if os.path.exists(drive_path) and os.listdir(drive_path):
    print("Notice: /content/drive is not empty. Skipping automatic deletion for safety.")
    # Uncomment below to forcibly clear the mountpoint (NOT your actual Drive!)
    # !rm -rf /content/drive/*

# --- Mount Google Drive ---
drive.mount('/content/drive')

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive


In [2]:
import os
import json

# --- Define input and output directories (adjust as needed) ---
CSV_DIR = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV"
COMBINED_CSV = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV"

# --- Create output directory ---
os.makedirs(COMBINED_CSV, exist_ok=True)

In [3]:
import os
import pandas as pd

def combine_all_csvs(src_dir, dst_dir, output_filename="combined_all.csv"):
    """
    Combine all CSV files in src_dir into one CSV in dst_dir/output_filename.
    """
    combined = []

    # Parcourt tous les fichiers CSV du dossier source
    for file in os.listdir(src_dir):
        if file.endswith(".csv"):
            file_path = os.path.join(src_dir, file)
            print(f"[+] Lecture de : {file_path}")
            df = pd.read_csv(file_path)
            combined.append(df)

    if not combined:
        print("Aucun fichier CSV trouvé !")
        return

    # Combine tous les DataFrames
    df_all = pd.concat(combined, ignore_index=True)

    # Sauvegarde le fichier combiné
    output_path = os.path.join(dst_dir, output_filename)
    df_all.to_csv(output_path, index=False)
    print(f"[✔] Fichier combiné créé : {output_path} ({len(df_all)} lignes)")


combine_all_csvs(CSV_DIR, COMBINED_CSV)

[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/xml_pyshark.csv
[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/lfi_pyshark.csv
[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/ssti_pyshark.csv
[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/cmdinj_pyshark.csv
[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/xss_pyshark.csv
[+] Lecture de : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP3_CONVERTED_CSV/sql_pyshark.csv
[✔] Fichier combiné créé : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV/combined_all.csv (1216 lignes)


In [4]:
import pandas as pd
import os

# Chemin du fichier combiné (doit correspondre à ta fonction combine_all_csvs)
combined_csv_path = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV/combined_all.csv"
shuffled_csv_path = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV/combined_all_shuffled.csv"

# Charge, mélange, et sauvegarde le CSV randomisé
df = pd.read_csv(combined_csv_path)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)  # random_state pour la reproductibilité
df.to_csv(shuffled_csv_path, index=False)
print(f"[✔] Fichier mélangé créé : {shuffled_csv_path} ({len(df)} lignes)")


[✔] Fichier mélangé créé : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV/combined_all_shuffled.csv (1216 lignes)
