In [22]:
import pandas as pd
import plotly.express as px

In [23]:
df = pd.read_csv('../data/data_predictiions.csv')
df['date_received'] = pd.to_datetime(df['date_received'])

In [24]:
region_mapping = {
    'Northeast': ['CT', 'ME', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT'],
    'Southeast': ['AL', 'AR', 'DE', 'FL', 'GA', 'KY', 'LA', 'MD', 'MS', 'NC', 'SC', 'TN', 'VA', 'WV'],
    'Midwest': ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI'],
    'Southwest': ['AZ', 'NM', 'OK', 'TX'],
    'West': ['AK', 'CA', 'CO', 'HI', 'ID', 'MT', 'NV', 'OR', 'UT', 'WA', 'WY']
}
state_to_region = {state: region for region, states in region_mapping.items() for state in states}


In [None]:
rolling_avg = 100
all_states = df['state'].unique()

df['region'] = df['state'].map(state_to_region)

all_dates = pd.date_range(df['date_received'].min(), df['date_received'].max())

grid = pd.MultiIndex.from_product([all_states, all_dates],
                                  names=['state', 'date_received']).to_frame(index=False)

mode_df = pd.merge(grid, df[['state', 'date_received', 'predicted_category']],
                   on=['state', 'date_received'], how='left')

mode_df = mode_df.sort_values(['state', 'date_received'])

def rolling_mode_categorical(series, window):
    result = []
    for i in range(len(series)):
        window_slice = series[max(0, i - window + 1):i + 1]
        mode = window_slice.dropna().mode()
        result.append(mode.iloc[0] if not mode.empty else None)
    return pd.Series(result, index=series.index)

mode_df['rolling_category'] = (
    mode_df.groupby('state')['predicted_category']
    .apply(lambda x: rolling_mode_categorical(x, rolling_avg))
    .reset_index(level=0, drop=True)
)

In [None]:
# Data vis prep
mode_df['rolling_category'] = (
    mode_df.groupby('state')['rolling_category'].ffill().bfill()
)

mode_df = mode_df.sort_values('date_received')
mode_df['date_received'] = mode_df['date_received'].dt.strftime('%Y-%m-%d')


In [None]:
fig = px.choropleth(
    mode_df,
    locations='state',
    locationmode='USA-states',
    color='rolling_category',
    animation_frame='date_received',
    scope='usa',
    title='Complaints by Day'
)
fig.show()

ValueError: Value of 'color' is not the name of a column in 'data_frame'. Expected one of ['state', 'date_received', 'most_common_category'] but received: rolling_category