# Brand Safety Analysis
## Steps 4.i, 4.ii: Toxicity Detection, Weaponized Meme Monitoring, Alert Generation

In [None]:
import sys
sys.path.append('..')

from src.brand_safety.toxicity_analyzer import ToxicityAnalyzer
from src.brand_safety.monitoring import BrandSafetyMonitor
from src.data_collection.knowyourmeme_scraper import KnowYourMemeCollector
import pandas as pd

## Load Analyzed Data

In [None]:
reddit_df = pd.read_csv('../data/reddit_predictions.csv')
print(f"Loaded {len(reddit_df)} memes with predictions")
reddit_df.head()

## Step 4.i: Toxicity Analysis

In [None]:
toxicity_analyzer = ToxicityAnalyzer()
reddit_df = toxicity_analyzer.process_dataframe(reddit_df)

print(f"Toxicity analysis complete")
print(f"Total weaponized memes: {reddit_df['is_weaponized'].sum()}")
print(f"Average toxicity score: {reddit_df['toxicity_score'].mean():.2f}")

reddit_df[['body', 'toxicity_score', 'is_weaponized']].head(10)

## Generate Brand Safety Alerts

In [None]:
alerts = toxicity_analyzer.generate_alerts(reddit_df)

print(f"\nBrand safety alerts generated: {len(alerts)}")
if len(alerts) > 0:
    print(f"Alert severity breakdown:")
    print(alerts['alert_severity'].value_counts())
    alerts.head(10)

## Subreddit Safety Analysis

In [None]:
subreddit_safety = toxicity_analyzer.subreddit_safety_analysis(reddit_df)

print(f"\nSubreddit Safety Analysis:")
print(f"Total subreddits analyzed: {len(subreddit_safety)}")
print(f"\nTop 10 Riskiest Subreddits:")
subreddit_safety.head(10)

## Step 4.ii: Setup Brand Safety Monitoring

In [None]:
monitor = BrandSafetyMonitor()
monitoring_alerts = monitor.setup_monitoring(reddit_df)

print(f"Monitoring alerts configured: {len(monitoring_alerts)}")
if len(monitoring_alerts) > 0:
    print(f"\nCritical Alerts:")
    monitoring_alerts[['alert_id', 'alert_type', 'ici_score', 'toxicity_score', 'recommended_action']].head(10)

## Step 4.i: KnowYourMeme Weaponized Pattern Cross-Reference

In [None]:
kym_collector = KnowYourMemeCollector()
kym_patterns = kym_collector.fetch_weaponized_patterns()

weaponized_matches = []
for idx, row in reddit_df.iterrows():
    text_lower = str(row['body']).lower()
    for _, pattern in kym_patterns.iterrows():
        if pattern['meme_name'] in text_lower:
            weaponized_matches.append({
                'meme_id': f"MEME_{idx}",
                'text': row['body'][:200],
                'matched_pattern': pattern['meme_name'],
                'toxicity_level': pattern['toxicity_level'],
                'financial_risk': pattern['financial_risk']
            })

weaponized_df = pd.DataFrame(weaponized_matches)
print(f"\nWeaponized pattern matches: {len(weaponized_df)}")
if len(weaponized_df) > 0:
    weaponized_df.head(10)

## Export Brand Safety Data to BigQuery

In [None]:
from google.cloud import bigquery
from config.settings import PROJECT_ID, DATASET_ID

if len(monitoring_alerts) > 0:
    client = bigquery.Client(project=PROJECT_ID)
    table_id = f"{PROJECT_ID}.{DATASET_ID}.brand_safety_alerts"
    
    job_config = bigquery.LoadJobConfig(write_disposition="WRITE_APPEND")
    job = client.load_table_from_dataframe(monitoring_alerts, table_id, job_config=job_config)
    job.result()
    
    print(f"Brand safety alerts exported to {table_id}")

## Brand Safety Summary

In [None]:
print("Brand Safety Analysis Summary:")
print(f"Total memes analyzed: {len(reddit_df)}")
print(f"\nToxicity Metrics:")
print(f"  Weaponized memes: {reddit_df['is_weaponized'].sum()}")
print(f"  Average toxicity score: {reddit_df['toxicity_score'].mean():.2f}")
print(f"  High toxicity (>3): {(reddit_df['toxicity_score'] >= 3).sum()}")
print(f"\nAlerts Generated:")
print(f"  Total alerts: {len(monitoring_alerts)}")
print(f"  KnowYourMeme matches: {len(weaponized_df)}")
print(f"\nRiskiest Subreddit: {subreddit_safety.iloc[0]['subreddit']}")
print(f"  Weaponized rate: {subreddit_safety.iloc[0]['weaponized_rate']*100:.1f}%")