In [1]:
"""
Interactive Dashboard for Book Bans Project
Built with Plotly Dash
Author: Joseph (Geographic & Dashboard Specialist)
"""

import dash
from dash import dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Initialize app with Bootstrap theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Banned But Not Forgotten: Interactive Dashboard"

# Load data
def load_dashboard_data():
    """Load all processed data for dashboard"""
    # Adjust paths as needed
    bans_df = pd.read_csv('data/processed/master_dataset_with_categories.csv')
    state_data = pd.read_csv('data/processed/geographic_master_dataset.csv')
    return bans_df, state_data

bans_df, state_data = load_dashboard_data()

# Define color scheme
COLORS = {
    'primary': '#2C3E50',
    'secondary': '#E74C3C',
    'accent': '#3498DB',
    'background': '#ECF0F1',
    'text': '#2C3E50'
}

# ========== LAYOUT ==========

# Header
header = dbc.Navbar(
    dbc.Container([
        dbc.Row([
            dbc.Col([
                html.H2("Banned But Not Forgotten", 
                       style={'color': 'white', 'margin': 0}),
                html.P("The Streisand Effect in Book Censorship", 
                      style={'color': '#ECF0F1', 'margin': 0, 'fontSize': 14})
            ])
        ], align='center'),
    ]),
    color=COLORS['primary'],
    dark=True,
    className='mb-4'
)

# Overview statistics cards
def create_stat_card(title, value, icon):
    """Create a statistics card"""
    return dbc.Card([
        dbc.CardBody([
            html.H5(icon, className='text-center', 
                   style={'fontSize': 40, 'color': COLORS['accent']}),
            html.H3(value, className='text-center', style={'fontWeight': 'bold'}),
            html.P(title, className='text-center', style={'color': 'gray'})
        ])
    ], className='shadow-sm')

overview_stats = dbc.Row([
    dbc.Col(create_stat_card("Books Banned", f"{len(bans_df):,}", "üìö"), md=3),
    dbc.Col(create_stat_card("States with Bans", 
                             f"{bans_df['State'].nunique()}", "üó∫Ô∏è"), md=3),
    dbc.Col(create_stat_card("Avg Interest Spike", 
                             f"{bans_df['percent_change'].mean():.0f}%", "üìà"), md=3),
    dbc.Col(create_stat_card("Books Analyzed", 
                             f"{len(bans_df):,}", "üîç"), md=3),
], className='mb-4')

# Filters
filters = dbc.Card([
    dbc.CardBody([
        html.H5("Filters", className='mb-3'),
        dbc.Row([
            dbc.Col([
                html.Label("Category:"),
                dcc.Dropdown(
                    id='category-filter',
                    options=[{'label': 'All Categories', 'value': 'all'}] + 
                            [{'label': cat, 'value': cat} 
                             for cat in bans_df['Primary Category'].unique()],
                    value='all',
                    clearable=False
                )
            ], md=4),
            dbc.Col([
                html.Label("State:"),
                dcc.Dropdown(
                    id='state-filter',
                    options=[{'label': 'All States', 'value': 'all'}] + 
                            [{'label': state, 'value': state} 
                             for state in sorted(bans_df['State'].unique())],
                    value='all',
                    clearable=False
                )
            ], md=4),
            dbc.Col([
                html.Label("Political Leaning:"),
                dcc.Dropdown(
                    id='political-filter',
                    options=[
                        {'label': 'All States', 'value': 'all'},
                        {'label': 'Red States', 'value': 'Red'},
                        {'label': 'Blue States', 'value': 'Blue'},
                        {'label': 'Purple States', 'value': 'Purple'}
                    ],
                    value='all',
                    clearable=False
                )
            ], md=4)
        ])
    ])
], className='mb-4')

# Main content tabs
content_tabs = dbc.Tabs([
    dbc.Tab(label="Geographic View", tab_id="geographic"),
    dbc.Tab(label="Category Analysis", tab_id="category"),
    dbc.Tab(label="Trends Over Time", tab_id="trends"),
    dbc.Tab(label="Book Explorer", tab_id="explorer"),
    dbc.Tab(label="About", tab_id="about")
], id="content-tabs", active_tab="geographic", className='mb-3')

# Tab content container
tab_content = html.Div(id="tab-content", className='mb-4')

# Footer
footer = dbc.Container([
    html.Hr(),
    html.P([
        "Created by the DSS Book Bans Research Team | ",
        html.A("GitHub Repository", href="#", target="_blank"),
        " | ",
        html.A("Data Sources", href="#")
    ], className='text-center text-muted')
], className='mt-5')

# Complete layout
app.layout = dbc.Container([
    header,
    overview_stats,
    filters,
    content_tabs,
    tab_content,
    footer
], fluid=True, style={'backgroundColor': COLORS['background'], 'minHeight': '100vh'})

# ========== CALLBACKS ==========

@callback(
    Output('tab-content', 'children'),
    Input('content-tabs', 'active_tab'),
    Input('category-filter', 'value'),
    Input('state-filter', 'value'),
    Input('political-filter', 'value')
)
def render_tab_content(active_tab, category, state, political):
    """Render content based on active tab and filters"""
    
    # Filter data based on selections
    filtered_df = bans_df.copy()
    
    if category != 'all':
        filtered_df = filtered_df[filtered_df['Primary Category'] == category]
    if state != 'all':
        filtered_df = filtered_df[filtered_df['State'] == state]
    if political != 'all':
        # Need to merge with state_data for political info
        filtered_df = filtered_df.merge(
            state_data[['State', 'Political_Leaning']], 
            on='State', how='left'
        )
        filtered_df = filtered_df[filtered_df['Political_Leaning'] == political]
    
    # Geographic View Tab
    if active_tab == "geographic":
        # Create state-level map
        state_counts = filtered_df['State'].value_counts().reset_index()
        state_counts.columns = ['State', 'Total_Bans']
        
        map_fig = px.choropleth(
            state_counts,
            locations='State',
            locationmode='USA-states',
            color='Total_Bans',
            scope='usa',
            color_continuous_scale='Reds',
            title='Book Bans by State'
        )
        map_fig.update_layout(height=500)
        
        return dbc.Row([
            dbc.Col([
                dcc.Graph(figure=map_fig)
            ], md=12),
            dbc.Col([
                html.H5("State Rankings", className='mt-3'),
                dbc.Table.from_dataframe(
                    state_counts.head(10),
                    striped=True,
                    bordered=True,
                    hover=True
                )
            ], md=6)
        ])
    
    # Category Analysis Tab
    elif active_tab == "category":
        category_counts = filtered_df['Primary Category'].value_counts()
        
        fig = px.bar(
            x=category_counts.index,
            y=category_counts.values,
            title='Books Banned by Category',
            labels={'x': 'Category', 'y': 'Number of Books'}
        )
        fig.update_layout(height=500)
        
        return dcc.Graph(figure=fig)
    
    # Trends Over Time Tab
    elif active_tab == "trends":
        # Time series plot (if date data available)
        if 'Ban Date' in filtered_df.columns:
            filtered_df['Ban Date'] = pd.to_datetime(filtered_df['Ban Date'])
            time_counts = filtered_df.groupby(
                filtered_df['Ban Date'].dt.to_period('M')
            ).size().reset_index()
            time_counts.columns = ['Month', 'Count']
            time_counts['Month'] = time_counts['Month'].dt.to_timestamp()
            
            fig = px.line(
                time_counts,
                x='Month',
                y='Count',
                title='Book Bans Over Time',
                labels={'Month': 'Date', 'Count': 'Number of Bans'}
            )
            fig.update_layout(height=500)
            
            return dcc.Graph(figure=fig)
        else:
            return html.P("Time series data not available")
    
    # Book Explorer Tab
    elif active_tab == "explorer":
        return dbc.Row([
            dbc.Col([
                html.H5("Search Books"),
                dcc.Input(
                    id='book-search',
                    type='text',
                    placeholder='Search by title...',
                    className='form-control mb-3'
                ),
                dbc.Table.from_dataframe(
                    filtered_df[['book_title', 'State', 'Primary Category', 
                                'percent_change']].head(20),
                    striped=True,
                    bordered=True,
                    hover=True
                )
            ])
        ])
    
    # About Tab
    elif active_tab == "about":
        return dbc.Row([
            dbc.Col([
                html.H3("About This Project"),
                html.P([
                    "This dashboard presents findings from our research on the ",
                    html.Strong("Streisand Effect"), 
                    " in book censorship‚Äîthe phenomenon where attempts to ",
                    "suppress information paradoxically increase public interest."
                ]),
                html.H4("Data Sources", className='mt-4'),
                html.Ul([
                    html.Li("PEN America's Index of School Book Bans"),
                    html.Li("Google Trends (public search interest)"),
                    html.Li("Goodreads (reader engagement)"),
                    html.Li("US Census Bureau (demographics)"),
                    html.Li("Election data (political context)")
                ]),
                html.H4("Team Members", className='mt-4'),
                html.Ul([
                    html.Li("Tahvia - Project Manager"),
                    html.Li("Kevin - Google Trends Specialist"),
                    html.Li("Violet - Goodreads Specialist"),
                    html.Li("Lauren - Category & Statistics Specialist"),
                    html.Li("Joseph - Geographic & Dashboard Specialist")
                ]),
                html.H4("Methodology", className='mt-4'),
                html.P([
                    "We analyzed book bans from 2020-2024, collecting engagement ",
                    "data from 3 months before to 3 months after each ban. ",
                    "Statistical significance was tested using t-tests and ANOVA. ",
                    "For detailed methodology, see our ",
                    html.A("full report", href="#"), "."
                ])
            ], md=8)
        ])

# Run server
if __name__ == '__main__':
    app.run_server(debug=True, port=8050)

FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/master_dataset_with_categories.csv'