# 📁 Project: RiskGuardian AI — Real-Time Risk Detection and Compliance Monitor

In [1]:
# === 00_setup.py ===
# Environment & data pipeline setup for RiskGuardian
import os
import requests
import pandas as pd
from pathlib import Path

# Setup folders
Path("data/raw").mkdir(parents=True, exist_ok=True)
Path("data/processed").mkdir(parents=True, exist_ok=True)
Path("models").mkdir(parents=True, exist_ok=True)
Path("artifacts").mkdir(parents=True, exist_ok=True)

print("✅ Folder structure created.")

# Example: SEC EDGAR 10-K Filing Data (Public Companies)
# Using sample ticker to simulate fetching filings
BASE_URL = "https://data.sec.gov/submissions/CIK0000320193.json"  # Apple Inc.
HEADERS = {"User-Agent": "RiskGuardianBot/0.1"}

response = requests.get(BASE_URL, headers=HEADERS)
if response.status_code == 200:
    data = response.json()
    pd.json_normalize(data['filings']['recent']).to_csv("data/raw/sec_filings_sample.csv", index=False)
    print("✅ Sample SEC filings data fetched and saved.")
else:
    print(f"❌ Failed to fetch SEC data: {response.status_code}")

# === 01_preprocess.py ===
# Basic cleaning and prep of fetched filings
import pandas as pd

df = pd.read_csv("data/raw/sec_filings_sample.csv")
df = df[['accessionNumber', 'reportDate', 'form', 'primaryDocument']]
df = df[df['form'].isin(['10-K', '10-Q'])]
df.to_csv("data/processed/sec_filings_clean.csv", index=False)
print("✅ Processed and filtered SEC filings.")


✅ Folder structure created.
✅ Sample SEC filings data fetched and saved.
✅ Processed and filtered SEC filings.
