# Bluesky Social Justice Data Analysis
## DFP F25 Social Media Blue Team

This notebook demonstrates how to analyze collected social justice data from Bluesky.

### Prerequisites:
1. Run data collection: `python main.py`
2. Install requirements: `pip install -r requirements.txt`


## 1. Setup and Data Loading


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json

# Load most recent data
data_dir = Path("data/alltime_socmed")
csv_files = list(data_dir.glob("socmed_*.csv"))

if csv_files:
    latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
    df = pd.read_csv(latest_file)
    print(f"✅ Loaded {len(df)} posts from {latest_file.name}")
else:
    print("❌ No data found. Run: python main.py")
    df = None


## 2. Basic Data Overview


In [None]:
if df is not None:
    print("📊 Data Overview:")
    print(f"  Total posts: {len(df)}")
    print(f"  Posts with location: {df['has_location_data'].sum()} ({df['has_location_data'].mean()*100:.1f}%)")
    print(f"  Posts with keywords: {df['has_keyword_matches'].sum()} ({df['has_keyword_matches'].mean()*100:.1f}%)")
    print(f"  Average text length: {df['text_length'].mean():.1f} characters")
    
    # Show sample post
    if len(df) > 0:
        sample = df.iloc[0]
        print(f"\n📝 Sample Post:")
        print(f"  Text: {sample['text'][:100]}...")
        print(f"  Author: @{sample['author_handle']}")
        print(f"  Location: {sample.get('detected_location', 'None')}")
        print(f"  Keywords: {sample.get('keyword_matches', [])[:3]}")


## 3. Run New Data Collection


In [None]:
print("🚀 To collect new data, run one of these commands:")
print()
print("# Basic 15-minute collection:")
print("python main.py")
print()
print("# 30-minute collection with all keywords:")
print("python main.py --duration 30 --keywords all")
print()
print("# Custom keywords from file:")
print("python main.py --keywords custom --duration 45")
print()
print("# Long collection with sleep prevention:")
print("python main.py --duration 120 --keywords all --no-sleep")
print()
print("📁 Data will be saved to: data/alltime_socmed/")
print("📊 Then re-run this notebook to analyze the new data!")
