# LogGuardian: Colab Runner

This notebook lets you upload a structured CSV log and generate a Security Intelligence report using the regex-only engine.

- Upload a CSV like `Linux_2k.log_structured.csv` or `HDFS_2k.log_structured.csv`.
- The notebook will clone this repo, install requirements, run the analyzer with the "intel" template, render the Markdown inline, and give you a download link.

Tip: If your file is large, consider mounting Google Drive.


In [None]:
# Optional: Mount Google Drive if you want to save inputs/outputs there
USE_DRIVE = False  # set True to enable
if USE_DRIVE:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')
    BASE_DIR = '/content/drive/MyDrive/LogGuardian'
else:
    BASE_DIR = '/content/LogGuardian'

import os
os.makedirs(BASE_DIR, exist_ok=True)
print('Base directory:', BASE_DIR)


In [None]:
# Clone the repository and install requirements
import subprocess, sys, os, textwrap
import pathlib

REPO_URL = 'https://github.com/Shreyansh1812/Anomaly_Detection'
COLAB_REPO_DIR = '/content/Anomaly_Detection'
if not os.path.exists(COLAB_REPO_DIR):
    print('Cloning repo...')
    subprocess.run(['git', 'clone', '--depth', '1', '--branch', 'feature/regex-analysis-only', REPO_URL, COLAB_REPO_DIR], check=True)
else:
    print('Repo already present; pulling latest...')
    subprocess.run(['bash', '-lc', f'cd {COLAB_REPO_DIR} && git fetch --depth=1 origin feature/regex-analysis-only && git checkout feature/regex-analysis-only && git pull --ff-only'], check=True)

print('Installing requirements...')
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-r', f'{COLAB_REPO_DIR}/requirements.txt'], check=True)

print('Ready.')


In [None]:
# Upload your structured CSV log file
from google.colab import files  # type: ignore
import os

print('Upload a structured CSV (e.g., Linux_2k.log_structured.csv)')
uploaded = files.upload()

# Save the first uploaded file to BASE_DIR
if uploaded:
    fname = next(iter(uploaded.keys()))
    src = f'/content/{fname}'
    dst = os.path.join(BASE_DIR, fname)
    os.replace(src, dst)
    INPUT_FILE = dst
    print('Saved to:', INPUT_FILE)
else:
    raise RuntimeError('No file uploaded')


In [None]:
# Run the analyzer to generate the intel report
import subprocess, os, uuid

output_dir = os.path.join(BASE_DIR, 'Reports')
os.makedirs(output_dir, exist_ok=True)
report_path = os.path.join(output_dir, f'colab_report_{uuid.uuid4().hex[:8]}.md')

cmd = [
    sys.executable, f'{COLAB_REPO_DIR}/Scripts/analyze_log_pipeline.py',
    '--input', INPUT_FILE,
    '--output', report_path,
    '--template', 'intel',
]
print('Running:', ' '.join(cmd))
subprocess.run(cmd, check=True)
print('Report written to', report_path)


In [None]:
# Render the Markdown inline and offer download
from IPython.display import Markdown, display
from google.colab import files  # type: ignore

with open(report_path, 'r', encoding='utf-8') as f:
    md = f.read()

display(Markdown(md))

print('\nDownload the report:')
files.download(report_path)


## Notes

- If the uploaded CSV lacks a `Content` or `message` column, the engine will pick a fallback column; results may vary.
- For exact parity with our Linux sample report, use a structured Linux auth CSV with a `Content` column.
- Optional enrichment is disabled by default. To enable, edit `config.yaml` in the repo and rerun.
