In [4]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load and clean dataset
df = pd.read_csv('complete.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.dropna(inplace=True)

cols_to_use = ['Total Confirmed cases', 'Death', 'Cured/Discharged/Migrated', 'New cases', 'New deaths']
df[cols_to_use] = df[cols_to_use].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=cols_to_use, inplace=True)

latest_date = df['Date'].max()
top_states = df[df['Date'] == latest_date].groupby('Name of State / UT')['Total Confirmed cases'] \
    .sum().sort_values(ascending=False).head(10).index.tolist()

# Dark layout
dark_layout = dict(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')
)

# 1. Time Series
fig1 = go.Figure()
for state in top_states:
    df_state = df[df['Name of State / UT'] == state]
    fig1.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Total Confirmed cases'],
                              mode='lines', name=f'{state} - Confirmed'))
    fig1.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Death'],
                              mode='lines', name=f'{state} - Deaths'))
    fig1.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Cured/Discharged/Migrated'],
                              mode='lines', name=f'{state} - Recovered'))
fig1.update_layout(title='COVID-19 Trends (Confirmed, Deaths, Recovered) - Top 10 States',
                   xaxis_title='Date', yaxis_title='Count', height=700, **dark_layout)
fig1.show()

# 2. Stacked Area
fig2 = go.Figure()
for state in top_states:
    df_state = df[df['Name of State / UT'] == state]
    fig2.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Total Confirmed cases'],
                              stackgroup='one', name=f'{state} - Confirmed'))
    fig2.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Death'],
                              stackgroup='one', name=f'{state} - Deaths'))
    fig2.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Cured/Discharged/Migrated'],
                              stackgroup='one', name=f'{state} - Recovered'))
fig2.update_layout(title='COVID-19 Stacked Area Plot - Top 10 States',
                   xaxis_title='Date', yaxis_title='Count', height=700, **dark_layout)
fig2.show()

# 3. Scatter Plot
fig3 = px.scatter(df[df['Name of State / UT'].isin(top_states)],
                  x='Total Confirmed cases', y='Death',
                  color='Name of State / UT', hover_data=['Date'],
                  title='Scatter Plot: Confirmed Cases vs Deaths (Top 10 States)')
fig3.update_layout(height=600, **dark_layout)
fig3.show()

# 4. Pairplot
fig4 = px.scatter_matrix(df[df['Name of State / UT'].isin(top_states)],
                         dimensions=cols_to_use,
                         color='Name of State / UT',
                         title='Pairwise Relationships (Top 10 States)')
fig4.update_layout(height=900, **dark_layout)
fig4.show()

# 5. Correlation Heatmap
corr_matrix = df[cols_to_use].corr().round(2)
fig5 = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu_r',
                 title='Correlation Matrix of COVID-19 Data')
fig5.update_layout(height=600, **dark_layout)
fig5.show()

# 6. Rolling Averages
df['Confirmed Rolling Avg'] = df['Total Confirmed cases'].rolling(7).mean()
df['Death Rolling Avg'] = df['Death'].rolling(7).mean()
df['Recovered Rolling Avg'] = df['Cured/Discharged/Migrated'].rolling(7).mean()

fig6 = go.Figure()
buttons = []
for i, state in enumerate(top_states):
    df_state = df[df['Name of State / UT'] == state]
    visible = [False] * (len(top_states) * 3)
    visible[i*3] = visible[i*3+1] = visible[i*3+2] = True
    fig6.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Confirmed Rolling Avg'],
                              mode='lines', name=f'{state} - Confirmed (7d)', visible=(i==0)))
    fig6.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Death Rolling Avg'],
                              mode='lines', name=f'{state} - Deaths (7d)', visible=(i==0)))
    fig6.add_trace(go.Scatter(x=df_state['Date'], y=df_state['Recovered Rolling Avg'],
                              mode='lines', name=f'{state} - Recovered (7d)', visible=(i==0)))
    buttons.append(dict(label=state,
                        method='update',
                        args=[{'visible': visible},
                              {'title': f'7-Day Rolling Average for {state}'}]))

fig6.update_layout(
    updatemenus=[dict(active=0, buttons=buttons, x=1.05, y=1.15)],
    title='7-Day Rolling Average for COVID-19 Data (Top 10 States)',
    xaxis_title='Date',
    yaxis_title='Count',
    height=700,
    **dark_layout
)
fig6.show()

# 7. Bar Chart - Top 15
latest_df = df[df['Date'] == latest_date]
top15_confirmed = latest_df.groupby('Name of State / UT')['Total Confirmed cases'] \
                           .sum().sort_values(ascending=False).head(15).reset_index()
fig7 = px.bar(top15_confirmed, x='Name of State / UT', y='Total Confirmed cases',
              title='Top 15 States by Total Confirmed Cases (Latest Date)',
              color='Total Confirmed cases', color_continuous_scale='reds')
fig7.update_layout(**dark_layout, height=600)
fig7.show()

# 8. Line Chart - Daily New Cases
daily_cases = df.groupby('Date')['New cases'].sum().reset_index()
fig8 = px.line(daily_cases, x='Date', y='New cases', title='Nationwide Daily New COVID-19 Cases')
fig8.update_layout(**dark_layout, height=600)
fig8.show()

# 9. Line Chart - Daily Deaths
daily_deaths = df.groupby('Date')['New deaths'].sum().reset_index()
fig9 = px.line(daily_deaths, x='Date', y='New deaths', title='Nationwide Daily New Deaths')
fig9.update_layout(**dark_layout, height=600)
fig9.show()

# 10. Pie Chart - Deaths Distribution
top10_deaths = latest_df.groupby('Name of State / UT')['Death'].sum() \
                        .sort_values(ascending=False).head(10).reset_index()
fig10 = px.pie(top10_deaths, values='Death', names='Name of State / UT',
               title='Proportion of Deaths by Top 10 States')
fig10.update_traces(textinfo='percent+label', pull=[0.05]*10)
fig10.update_layout(**dark_layout, height=600)
fig10.show()

# 11. Treemap - Confirmed & Deaths
summary = latest_df[['Name of State / UT', 'Total Confirmed cases', 'Death', 'Cured/Discharged/Migrated']] \
    .groupby('Name of State / UT').sum().reset_index()
fig11 = px.treemap(summary, path=['Name of State / UT'],
                   values='Total Confirmed cases',
                   color='Death',
                   hover_data={'Cured/Discharged/Migrated': True},
                   color_continuous_scale='Reds',
                   title='Treemap of Total Confirmed Cases and Deaths by State')
fig11.update_layout(**dark_layout, height=700)
fig11.show()

# 12. Geographic Heatmap (India)
# You need a GeoJSON or mapping from state name to lat/lon for this to work fully
# We'll use a choropleth workaround for visualizing values by state name
fig12 = px.choropleth(summary,
                      locations='Name of State / UT',
                      locationmode='geojson-id',  # Replace with ISO codes if available
                      color='Total Confirmed cases',
                      hover_name='Name of State / UT',
                      color_continuous_scale='OrRd',
                      title='Geographic Heatmap of Total Confirmed Cases by State')
fig12.update_geos(fitbounds="locations", visible=False)
fig12.update_layout(**dark_layout, height=700)
fig12.show()
