# Carbon Emissions Violation Detection Dashboard
Upload `parquet_data.tar.gz` to Colab Files

In [None]:
!pip install plotly pandas pyarrow streamlit
!tar -xzf parquet_data.tar.gz

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load EPA data
epa = pd.read_parquet('outputs/parquet_only/epa_parquet')
print(f"EPA records: {len(epa):,}")

In [None]:
# Top violations by state
violations = epa[
    ((epa['Parameter Name'] == 'PM2.5 - Local Conditions') & (epa['Sample Measurement'] > 35)) |
    ((epa['Parameter Name'] == 'Ozone') & (epa['Sample Measurement'] > 0.070))
]
top_states = violations.groupby('State Name').size().nlargest(5)

fig = px.bar(x=top_states.index, y=top_states.values,
             labels={'x': 'State', 'y': 'Violations'},
             title='Top 5 States by Violations')
fig.show()

In [None]:
# PM2.5 trend over years
pm25 = epa[epa['Parameter Name'] == 'PM2.5 - Local Conditions']
yearly = pm25.groupby('year_partition')['Sample Measurement'].mean()

fig = go.Figure()
fig.add_trace(go.Scatter(x=yearly.index, y=yearly.values, mode='lines+markers', name='PM2.5'))
fig.add_hline(y=12, line_dash='dash', line_color='red', annotation_text='EPA Target')
fig.update_layout(title='PM2.5 Trend (2015-2024)', xaxis_title='Year', yaxis_title='PM2.5 (µg/m³)')
fig.show()

## Key Metrics
- Total Records: 225M
- Model AUC: 99.25%
- CA Violations: 442K