In [176]:
import pandas as pd
import plotly.graph_objects as go


In [177]:
df_preds = pd.read_pickle("../data/results/df_preds_2024-03-04.pkl")
df_latest = pd.read_pickle("../data/interim/df_NNDSS_latest.pkl")
df_historical = pd.read_pickle("../data/interim/df_NNDSS_historical.pkl")

In [178]:
df_preds.head()

Unnamed: 0,date,item_id,pred_mean,pred_lower,pred_upper
0,2024-03-04,ARIZONA_Campylobacteriosis,85.129997,6.0,288.0
1,2024-03-04,ARKANSAS_Chlamydia trachomatis infection,205.509995,42.0,543.0
2,2024-03-04,ARKANSAS_Gonorrhea,104.559998,13.0,306.0
3,2024-03-04,CALIFORNIA_Campylobacteriosis,180.919998,43.0,435.0
4,2024-03-04,CALIFORNIA_Chlamydia trachomatis infection,1083.73999,292.0,2096.0


In [179]:
df_latest[df_latest.new_cases>0].head(11)

Unnamed: 0,item_id,year,week,date,label,state,new_cases
780,ARIZONA_Brucellosis,2024,10,2024-03-04,Brucellosis,ARIZONA,1
837,ARIZONA_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,ARIZONA,13
894,"ARIZONA_Candida auris, clinical",2024,10,2024-03-04,"Candida auris, clinical",ARIZONA,1
833,ARKANSAS_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,ARKANSAS,10
561,CALIFORNIA_Babesiosis,2024,10,2024-03-04,Babesiosis,CALIFORNIA,1
675,"CALIFORNIA_Botulism, Infant",2024,10,2024-03-04,"Botulism, Infant",CALIFORNIA,1
846,CALIFORNIA_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,CALIFORNIA,131
903,"CALIFORNIA_Candida auris, clinical",2024,10,2024-03-04,"Candida auris, clinical",CALIFORNIA,2
838,COLORADO_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,COLORADO,10
798,CONNECTICUT_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,CONNECTICUT,5


In [180]:
selected_item_id = "CALIFORNIA_Campylobacteriosis"
df_latest.loc[
(df_latest['item_id'] == selected_item_id) & 
(df_latest['date'] == "2024-03-04"), 
'new_cases'] = 750 

In [181]:
df_latest[df_latest.new_cases>0].head(8)

Unnamed: 0,item_id,year,week,date,label,state,new_cases
780,ARIZONA_Brucellosis,2024,10,2024-03-04,Brucellosis,ARIZONA,1
837,ARIZONA_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,ARIZONA,13
894,"ARIZONA_Candida auris, clinical",2024,10,2024-03-04,"Candida auris, clinical",ARIZONA,1
833,ARKANSAS_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,ARKANSAS,10
561,CALIFORNIA_Babesiosis,2024,10,2024-03-04,Babesiosis,CALIFORNIA,1
675,"CALIFORNIA_Botulism, Infant",2024,10,2024-03-04,"Botulism, Infant",CALIFORNIA,1
846,CALIFORNIA_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,CALIFORNIA,750
903,"CALIFORNIA_Candida auris, clinical",2024,10,2024-03-04,"Candida auris, clinical",CALIFORNIA,2


In [182]:
df_historical[df_historical.new_cases>=0].item_id.unique()

array(['ARIZONA_Campylobacteriosis',
       'ARKANSAS_Chlamydia trachomatis infection', 'ARKANSAS_Gonorrhea',
       'CALIFORNIA_Campylobacteriosis',
       'CALIFORNIA_Chlamydia trachomatis infection',
       'CALIFORNIA_Gonorrhea', 'COLORADO_Chlamydia trachomatis infection',
       'COLORADO_Gonorrhea', 'DELAWARE_Chlamydia trachomatis infection',
       'FLORIDA_Chlamydia trachomatis infection', 'FLORIDA_Gonorrhea',
       'GEORGIA_Chlamydia trachomatis infection', 'GEORGIA_Gonorrhea',
       'IDAHO_Chlamydia trachomatis infection',
       'ILLINOIS_Chlamydia trachomatis infection'], dtype=object)

In [183]:
df_latest[df_latest.item_id=='ARIZONA_Campylobacteriosis']

Unnamed: 0,item_id,year,week,date,label,state,new_cases
837,ARIZONA_Campylobacteriosis,2024,10,2024-03-04,Campylobacteriosis,ARIZONA,13


In [189]:
def plot_outbreak(df_historical_chart, df_latest_chart, df_preds_chart, selected_item_id):
    
    

    fig = go.Figure(layout_template="plotly_dark")
    
    # Initialize pred_upper with a default value
    pred_upper = None
    
    # Filter datasets for the selected item_id
    df_historical_filtered = df_historical_chart[df_historical_chart['item_id'] == selected_item_id]
    df_latest_filtered = df_latest_chart[df_latest_chart['item_id'] == selected_item_id]
    df_preds_filtered = df_preds_chart[df_preds_chart['item_id'] == selected_item_id]

    # Plot historical data if available
    if not df_historical_filtered.empty:
        fig.add_trace(go.Scatter(x=df_historical_filtered['date'], y=df_historical_filtered['new_cases'], mode='lines', name='Historical', line=dict(color='skyblue')))

    # Plot prediction data if available
    if not df_preds_filtered.empty:
        pred_date = df_preds_filtered['date'].iloc[0]
        pred_mean = df_preds_filtered['pred_mean'].iloc[0]
        pred_lower = df_preds_filtered['pred_lower'].iloc[0]
        pred_upper = df_preds_filtered['pred_upper'].iloc[0]
        
        # Include error bars for the prediction interval
        fig.add_trace(go.Scatter(x=[pred_date], y=[pred_mean], mode='lines', name='Prediction Interval', 
            error_y=dict(type='data', symmetric=False, array=[pred_upper - pred_mean], arrayminus=[pred_mean - pred_lower]), marker=dict(color='#FF6347', size=12)))
        fig.add_trace(go.Scatter(x=[pred_date], y=[pred_mean], mode='markers', name='Prediction', marker=dict(color='#FF6347', size=12)))

    # Plot latest data if available
    if not df_latest_filtered.empty:
        fig.add_trace(go.Scatter(x=df_latest_filtered['date'], y=df_latest_filtered['new_cases'], mode='markers', name='Latest', marker=dict(color='#3CB371', size=12)))

    # Check for potential outbreak and update marker if present
    if pred_upper is not None and not df_latest_filtered.empty and df_latest_filtered['new_cases'].iloc[0] > pred_upper:
        outbreak_date = df_latest_filtered['date'].iloc[0]
        outbreak_cases = df_latest_filtered['new_cases'].iloc[0]
        fig.add_trace(go.Scatter(x=[outbreak_date], y=[outbreak_cases], mode='markers+text', 
            name='Potential Outbreak', marker=dict(color='yellow', size=15, symbol='x', line=dict(color='#B22222', width=2)), text="Potential Outbreak", textposition="top center"))
    else:
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers+text', name='Potential Outbreak',
                         marker=dict(color='#DAA520', size=15, symbol='x', line=dict(color='#800000', width=2)),
                         text="Potential Outbreak", textposition="top center", visible='legendonly'))

    # Add legend-only traces to ensure all possible data representations are covered in the legend
    if df_historical_filtered.empty:
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Historical',
                                 line=dict(color='skyblue'), visible='legendonly'))
    if df_latest_filtered.empty:
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name='Latest',
                                 marker=dict(color='#98FF98', size=12), visible='legendonly'))
    if df_preds_filtered.empty:
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name='Prediction',
                                 marker=dict(color='darkred', size=12), visible='legendonly'))
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Prediction Interval',
                                 line=dict(color='red'), visible='legendonly'))


    fig.update_layout(title=f"{selected_item_id}", xaxis_title="Date", yaxis_title="New Cases")
    fig.show()


In [190]:
# chart shows: we have historical, latest, pred/interval, and NO outbreak
selected_item_id = "ARIZONA_Campylobacteriosis"
plot_outbreak(df_historical, df_latest,  df_preds, selected_item_id)

In [186]:
# chart shows: we have historical, pred/interval, NO latest and therefore NO outbreak
selected_item_id = "ARKANSAS_Chlamydia trachomatis infection"
plot_outbreak(df_historical, df_latest,  df_preds, selected_item_id)

In [187]:
# chart shows: we have historical, pred/interval, latest and OUTBREAK
selected_item_id = "CALIFORNIA_Campylobacteriosis"
plot_outbreak(df_historical, df_latest,  df_preds, selected_item_id)

In [188]:
# chart shows: we have latest and nothing else
selected_item_id = "FLORIDA_Campylobacteriosis"
selected_item_id = "VIRGINIA_Botulism, Infant"
plot_outbreak(df_historical, df_latest,  df_preds, selected_item_id)

In [170]:
# if the last data point in the trained dataset (ie the last date) are ALL NA's for all time series ( OR 0!),
# all preds for next step will be 0. At least one has to be non NA/0 to get actual predictions. ugh. 
# yup confirmed this.
# this may only be true for the one step ahead forecast, 

Unnamed: 0,item_id,year,week,date,label,new_cases,filled_value
0,ARIZONA_Campylobacteriosis,2022,1,2022-01-03,Campylobacteriosis,8,False
1,ARIZONA_Campylobacteriosis,2022,2,2022-01-10,Campylobacteriosis,8,False
2,ARIZONA_Campylobacteriosis,2022,3,2022-01-17,Campylobacteriosis,8,False
3,ARIZONA_Campylobacteriosis,2022,4,2022-01-24,Campylobacteriosis,8,False
4,ARIZONA_Campylobacteriosis,2022,5,2022-01-31,Campylobacteriosis,8,False
...,...,...,...,...,...,...,...
1704,ILLINOIS_Chlamydia trachomatis infection,2024,5,2024-01-29,Chlamydia trachomatis infection,114,False
1705,ILLINOIS_Chlamydia trachomatis infection,2024,6,2024-02-05,Chlamydia trachomatis infection,80,False
1706,ILLINOIS_Chlamydia trachomatis infection,2024,7,2024-02-12,Chlamydia trachomatis infection,95,False
1707,ILLINOIS_Chlamydia trachomatis infection,2024,8,2024-02-19,Chlamydia trachomatis infection,81,False
