In [1]:
from google.colab import drive
import os

# --- Unmount any existing Drive mount (if needed) ---
drive.flush_and_unmount()

# --- (Optional) Clear local /content/drive mountpoint (disabled for safety) ---
drive_path = '/content/drive'
if os.path.exists(drive_path) and os.listdir(drive_path):
    print("Notice: /content/drive is not empty. Skipping automatic deletion for safety.")
    # Uncomment below to forcibly clear the mountpoint (NOT your actual Drive!)
    # !rm -rf /content/drive/*

# --- Mount Google Drive ---
drive.mount('/content/drive')

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive


In [7]:
import pandas as pd
from datetime import datetime
import os

input_csv = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP4_COMBINED_CSV/combined_all_shuffled.csv"
output_log = "/content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP5_CSV_To_Apache_Logs/Log_file.log"

# S'assurer que le dossier de sortie existe
os.makedirs(os.path.dirname(output_log), exist_ok=True)

df = pd.read_csv(input_csv)

def format_apache_log(row):
    ip = row.get("src_ip", "-")
    identd = "-"
    user = "-"
    # Format de date Apache (ex: 10/Oct/2000:13:55:36 +0000)
    date_src = row.get("res_header_Date", None)
    date_str = "-"
    if pd.notnull(date_src):
        try:
            # Ex: "Mon, 04 Dec 2023 11:42:21 GMT"
            t = datetime.strptime(date_src, "%a, %d %b %Y %H:%M:%S %Z")
            date_str = t.strftime("%d/%b/%Y:%H:%M:%S +0000")
        except Exception:
            date_str = "-"
    method = row.get("req_method", "GET")
    url = row.get("req_url", "/")
    http_version = "HTTP/1.1"
    request = f'{method} {url} {http_version}'
    code = row.get("res_header_Response_Code", 200)
    size = row.get("res_header_Content_Length", "-")
    ua = row.get("req_header_User_Agent", "-")
    # Pas de Referer explicite dans tes colonnes : on met "-"
    #referer = "-"
    # (Optionnel) Pour enrichir : attack_tag (tu peux ajouter à la fin du log si besoin)
    # attack = row.get("attack_tag", "-")
    log_line = f'{ip} {identd} {user} [{date_str}] "{request}" {code} {size} "{ua}"'
    # log_line += f' {attack}'  # décommente si tu veux ajouter attack_tag à la fin
    return log_line

with open(output_log, "w", encoding="utf-8") as f:
    for _, row in df.iterrows():
        f.write(format_apache_log(row) + "\n")

print(f"[✔] Log Apache généré : {output_log}")


[✔] Log Apache généré : /content/drive/MyDrive/BINOME_WORK/STAGE_CERIST/DATA_FORMATING/STEP5_CSV_To_Apache_Logs/Log_file.log
