### Project Stage - V (Dashboard)

In [1]:
# Importing the necessary libraries
import dash
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Loading the cases dataset 
df_cases = pd.read_csv('covid_confirmed_usafacts.csv')
df_cases

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,19913,19913,19913,19913,19913,19913,19913,19913,19913,19913
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,70521,70521,70521,70521,70521,70521,70521,70521,70521,70521
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,7582,7582,7582,7582,7582,7582,7582,7582,7582,7582
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,8149,8149,8149,8149,8149,8149,8149,8149,8149,8149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Sweetwater County,WY,56,0,0,0,0,0,0,...,12645,12645,12645,12645,12645,12645,12645,12645,12645,12645
3189,56039,Teton County,WY,56,0,0,0,0,0,0,...,12206,12206,12206,12206,12206,12206,12206,12206,12206,12206
3190,56041,Uinta County,WY,56,0,0,0,0,0,0,...,6468,6468,6468,6468,6468,6468,6468,6468,6468,6468
3191,56043,Washakie County,WY,56,0,0,0,0,0,0,...,2640,2640,2640,2640,2640,2640,2640,2640,2640,2640


In [3]:
# Loading the deaths dataset
df_deaths = pd.read_csv('covid_deaths_usafacts.csv')
df_deaths

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,235,235,235,235,235,235,235,235,235,235
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,731,731,731,731,731,731,731,731,731,731
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,104,104,104,104,104,104,104,104,104,104
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,111,111,111,111,111,111,111,111,111,111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Sweetwater County,WY,56,0,0,0,0,0,0,...,142,142,142,142,142,142,142,142,142,142
3189,56039,Teton County,WY,56,0,0,0,0,0,0,...,16,16,16,16,16,16,16,16,16,16
3190,56041,Uinta County,WY,56,0,0,0,0,0,0,...,43,43,43,43,43,43,43,43,43,43
3191,56043,Washakie County,WY,56,0,0,0,0,0,0,...,51,51,51,51,51,51,51,51,51,51


In [4]:
# Defining the date range ( this is ths date range that we have performed analysis in the previous four stages)
start_date = '2020-06-01'
end_date = '2021-01-03'

# Filtering the columns based on the date range for cases dataset
cases_date_columns = df_cases.columns[4:]
selected_cases_dates = [col for col in cases_date_columns if start_date <= col <= end_date]
df_cases_filtered = df_cases[['countyFIPS', 'County Name', 'State', 'StateFIPS'] + selected_cases_dates]

# Filtering the columns based on the date range for deaths dataset
deaths_date_columns = df_deaths.columns[4:]
selected_deaths_dates = [col for col in deaths_date_columns if start_date <= col <= end_date]
df_deaths_filtered = df_deaths[['countyFIPS', 'County Name', 'State', 'StateFIPS'] + selected_deaths_dates]

# Reshaping the filtered cases dataframe
cases_melted = df_cases_filtered.melt(id_vars=['countyFIPS', 'County Name', 'State', 'StateFIPS'], var_name='date', value_name='cases')
cases_melted = cases_melted[cases_melted['County Name'] != 'Statewide Unallocated']

# Reshaping the filtered deaths dataframe
deaths_melted = df_deaths_filtered.melt(id_vars=['countyFIPS', 'County Name', 'State', 'StateFIPS'], var_name='date', value_name='deaths')
deaths_melted = deaths_melted[deaths_melted['County Name'] != 'Statewide Unallocated']

In [5]:
cases_melted

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,date,cases
1,1001,Autauga County,AL,1,2020-06-01,233
2,1003,Baldwin County,AL,1,2020-06-01,292
3,1005,Barbour County,AL,1,2020-06-01,172
4,1007,Bibb County,AL,1,2020-06-01,76
5,1009,Blount County,AL,1,2020-06-01,63
...,...,...,...,...,...,...
692876,56037,Sweetwater County,WY,56,2021-01-03,3020
692877,56039,Teton County,WY,56,2021-01-03,2159
692878,56041,Uinta County,WY,56,2021-01-03,1570
692879,56043,Washakie County,WY,56,2021-01-03,783


In [6]:
deaths_melted

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,date,deaths
1,1001,Autauga County,AL,1,2020-06-01,5
2,1003,Baldwin County,AL,1,2020-06-01,9
3,1005,Barbour County,AL,1,2020-06-01,1
4,1007,Bibb County,AL,1,2020-06-01,1
5,1009,Blount County,AL,1,2020-06-01,1
...,...,...,...,...,...,...
692876,56037,Sweetwater County,WY,56,2021-01-03,16
692877,56039,Teton County,WY,56,2021-01-03,4
692878,56041,Uinta County,WY,56,2021-01-03,7
692879,56043,Washakie County,WY,56,2021-01-03,19


In [7]:
# Merging the datasets
merged_data = cases_melted.merge(deaths_melted, on=['countyFIPS', 'County Name', 'State', 'StateFIPS', 'date'])

# Displaying the merged data
print(merged_data)

        countyFIPS         County Name State  StateFIPS        date  cases  \
0             1001     Autauga County     AL          1  2020-06-01    233   
1             1003     Baldwin County     AL          1  2020-06-01    292   
2             1005     Barbour County     AL          1  2020-06-01    172   
3             1007        Bibb County     AL          1  2020-06-01     76   
4             1009      Blount County     AL          1  2020-06-01     63   
...            ...                 ...   ...        ...         ...    ...   
681809       56037  Sweetwater County     WY         56  2021-01-03   3020   
681810       56039       Teton County     WY         56  2021-01-03   2159   
681811       56041       Uinta County     WY         56  2021-01-03   1570   
681812       56043    Washakie County     WY         56  2021-01-03    783   
681813       56045      Weston County     WY         56  2021-01-03    477   

        deaths  
0            5  
1            9  
2           

In [8]:
# Defining the app
app = Dash(__name__)

# Filtering out the unique states
states = merged_data['State'].unique()

# Defining the dashbiard layout (all the required dropdowns, radio buttons (selections) required for task 1 and task 2
# and the plots)
app.layout = html.Div([
    html.H1("COVID-19 Data Analysis Dashboard"),
    
    # Date selection dropdown
    html.Label("Select Date:"),
    dcc.DatePickerSingle(
        id='date-picker',
        min_date_allowed=merged_data['date'].min(),
        max_date_allowed=merged_data['date'].max(),
        initial_visible_month=merged_data['date'].max(),
        date=merged_data['date'].max()
    ),
    
    # Mode seletion (log or linear)
    html.Label("Select Scale:"),
    dcc.RadioItems(
        id='scale-selector',
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Log', 'value': 'log'}
        ],
        value='linear',
        labelStyle={'display': 'inline-block'}
    ),
    
    # Choose analysis (cases or deaths)
    html.Label("Select Analysis:"),
    dcc.Checklist(
        id='analysis-selector',
        options=[
            {'label': 'Cases', 'value': 'cases'},
            {'label': 'Deaths', 'value': 'deaths'}
        ],
        value=['cases', 'deaths'],
        labelStyle={'display': 'inline-block'}
    ),
    
    # Regression selection (linear or non-linear)
    html.Label("Select Regression Mode:"),
    dcc.RadioItems(
        id='regression-selector',
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Polynomial', 'value': 'polynomial'}
        ],
        value='linear',
        labelStyle={'display': 'inline-block'}
    ),

    dcc.Graph(id='main-graph'),
    
    # Selection of states (multiple selection)
    html.Label("Select State(s):"),
    dcc.Dropdown(
        id='state-selector',
        options=[{'label': state, 'value': state} for state in states],
        value=[states[0]],  # Default value for the dropdown
        multi=True
    ),
    
    html.Label("Select Data Type:"),
    dcc.RadioItems(
        id='data-type-selector',
        options=[
            {'label': 'Cases', 'value': 'cases'},
            {'label': 'Deaths', 'value': 'deaths'}
        ],
        value='cases',
        labelStyle={'display': 'inline-block'}
    ),
    
    html.Div([
        dcc.Graph(id='trend-graph')
    ])
])


@app.callback(
    Output('main-graph', 'figure'),
    Input('date-picker', 'date'),
    Input('scale-selector', 'value'),
    Input('analysis-selector', 'value'),
    Input('regression-selector', 'value')
)

# Defining the funtion to accomplish the task 1 (building main graph). Passing the date, mode, regression type as parameters
def update_main_graph(selected_date, selected_scale, selected_analysis, regression_mode):
    filtered_data = merged_data[merged_data['date'] == selected_date]

    fig = px.scatter()
    for analysis in selected_analysis:
        if analysis == 'cases':
            y = filtered_data['cases']
        elif analysis == 'deaths':
            y = filtered_data['deaths']

        fig.add_scatter(x=filtered_data['cases'], y=y, mode='markers', name=analysis.capitalize())
        
        # Curve fit for Linear Regression
        if regression_mode == 'linear':
            X = filtered_data['cases'].values.reshape(-1, 1)
            model = LinearRegression()
            model.fit(X, y)
            y_pred = model.predict(X)
            
        #Curve fit for Polynomial Regression
        elif regression_mode == 'polynomial':
            X = filtered_data['cases'].values.reshape(-1, 1)
            poly = PolynomialFeatures(degree=3)  # Considering degree 3 as we have used the same in previous stages.
            X_poly = poly.fit_transform(X)
            model = LinearRegression()
            model.fit(X_poly, y)
            y_pred = model.predict(X_poly)

        fig.add_scatter(x=filtered_data['cases'], y=y_pred, mode='lines', name=f'{analysis.capitalize()} - {regression_mode.capitalize()} Trend')

    fig.update_layout(title='COVID-19 Analysis', xaxis_title='Cases', yaxis_title='Deaths' if 'deaths' in selected_analysis else 'Cases', showlegend=True, legend_title='Analysis')

    if selected_scale == 'log':
        fig.update_yaxes(type='log')

    return fig

@app.callback(
    Output('trend-graph', 'figure'),
    Input('state-selector', 'value'),
    Input('data-type-selector', 'value')
)

# Defining function to plot the trend line using moving average
def update_trend_graph(selected_states, data_type):
    filtered_data = merged_data[merged_data['State'].isin(selected_states)]
    filtered_data[f'{data_type}_ma'] = filtered_data.groupby('State')[data_type].transform(lambda x: x.rolling(7).mean())

    fig = px.line(filtered_data, x='date', y=f'{data_type}_ma', color='State', title=f'7-Day Moving Average Trend ({data_type.capitalize()})')

    return fig

if __name__ == '__main__':
    app.run_server(debug=True,port=8051)
