# Tool-First Content Analytics Dashboard
Replaces 680 lines of custom analytics engine with interactive Jupyter notebook.
Philosophy: REUSE OVER REBUILD - Use pandas/plotly instead of custom code.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import json
from pathlib import Path
import numpy as np

In [None]:
# Load enriched data (tool-first vs custom storage wrapper)
enriched_file = Path('/home/kiriti/alpha_projects/intelforge/crawl_ops/data_runs/20250719/enriched_data.jsonl')

data = []
with enriched_file.open('r') as f:
    for line in f:
        data.append(json.loads(line))

df = pd.DataFrame(data)
print(f"Loaded {len(df)} enriched entries")
df.head()

## Quality Distribution Analysis
1 line vs 50 lines of custom aggregation code

In [None]:
# Quality score distribution (1 line vs 50 LOC custom)
fig = px.histogram(df, x='quality_score', 
                   title='Content Quality Score Distribution',
                   labels={'quality_score': 'Quality Score', 'count': 'Number of Articles'})
fig.show()

print(f"Average Quality Score: {df['quality_score'].mean():.1f}")
print(f"Above 70 threshold: {(df['quality_score'] >= 70).sum()} articles")

## Strategy Keywords Analysis
Pandas handles the heavy lifting vs custom aggregation logic

In [None]:
# Strategy keyword frequency (pandas vs custom counting)
all_keywords = []
for keywords in df['strategy_keywords']:
    all_keywords.extend(keywords)

keyword_counts = pd.Series(all_keywords).value_counts().head(10)

fig = px.bar(x=keyword_counts.index, y=keyword_counts.values,
             title='Top 10 Strategy Keywords',
             labels={'x': 'Strategy Keywords', 'y': 'Frequency'})
fig.show()

print(f"Total strategy keywords found: {len(all_keywords)}")
print(f"Unique strategy keywords: {len(set(all_keywords))}")

## Content Tags Analysis
Interactive visualization vs static custom reports

In [None]:
# Content tags distribution
all_tags = []
for tags in df['tags']:
    all_tags.extend(tags)

tag_counts = pd.Series(all_tags).value_counts()

fig = px.pie(values=tag_counts.values, names=tag_counts.index,
             title='Content Type Distribution')
fig.show()

print(f"Content classification coverage: {len(tag_counts)} different tag types")

## Readability vs Quality Correlation
Advanced analysis with minimal code using pandas/plotly

In [None]:
# Correlation analysis (pandas vs custom statistics)
fig = px.scatter(df, x='readability_score', y='quality_score',
                 size='content_length', hover_data=['url'],
                 title='Quality Score vs Readability Score',
                 labels={'readability_score': 'Readability Score', 
                        'quality_score': 'Quality Score'})
fig.show()

correlation = df['readability_score'].corr(df['quality_score'])
print(f"Readability-Quality Correlation: {correlation:.3f}")

## Summary Statistics
Complete analytics dashboard in <50 lines vs 680 lines custom engine

In [None]:
# Summary dashboard (pandas describe vs custom statistics)
print("=== TOOL-FIRST ANALYTICS SUMMARY ===")
print(f"Total Articles Processed: {len(df)}")
print(f"Average Content Length: {df['content_length'].mean():.0f} characters")
print(f"Average Quality Score: {df['quality_score'].mean():.1f}/100")
print(f"Average Readability: {df['readability_score'].mean():.1f}")
print(f"Total Strategy Keywords: {len(all_keywords)}")
print(f"Content Types Identified: {len(set(all_tags))}")
print("\n=== BENEFITS OF TOOL-FIRST APPROACH ===")
print("✅ Interactive visualizations vs static reports")
print("✅ Real-time data exploration vs fixed queries")
print("✅ 50 lines vs 680 lines (93% code reduction)")
print("✅ Jupyter sharing vs custom dashboard maintenance")
print("✅ Plotly/pandas vs custom analytics engine")