# Zero-shot classification on filing titles to detect risk types

In [7]:
import os
from transformers import pipeline
import pandas as pd

In [16]:
companies = {
    "Apple": "0000320193",
    "Microsoft": "0000789019",
    "Meta": "0001326801",
    "Amazon": "0001018724",
    "Google": "0001652044",
    "Rise": "0001640967",
    "Tesla": "0001318605",
    "Nvidia": "0001045810"
}

In [19]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

risk_labels = [
    "cybersecurity risk",
    "regulatory non-compliance",
    "financial instability",
    "legal exposure",
    "supply chain disruption"
]

for company in companies:
    filing_path = f"../data/processed/{company}_filing_clean.txt"
    if os.path.exists(filing_path):
        with open(filing_path, "r", encoding="utf-8") as f:
            text = f.read()[:3000]

        result = classifier(text, candidate_labels=risk_labels)

        df_result = pd.DataFrame({
            "risk_label": result['labels'],
            "score": result['scores']
        })

        df_result.to_csv(f"../data/processed/{company}_filing_risk_labels.csv", index=False)
        print(f"✅ Applied zero-shot classification for risk labeling on {company} filing.")
    else:
        print(f"⚠️ Skipping {company}: No filing text found.")


Device set to use cpu


✅ Applied zero-shot classification for risk labeling on Apple filing.
⚠️ Skipping Microsoft: No filing text found.
✅ Applied zero-shot classification for risk labeling on Meta filing.
✅ Applied zero-shot classification for risk labeling on Amazon filing.
✅ Applied zero-shot classification for risk labeling on Google filing.
⚠️ Skipping Rise: No filing text found.
⚠️ Skipping Tesla: No filing text found.
✅ Applied zero-shot classification for risk labeling on Nvidia filing.
