In [4]:
# ============================================================
# COVID-19 Analysis and Visualization using Plotly Express
# ============================================================

# --- Import Libraries ---
import pandas as pd
import numpy as np
import plotly.express as px

# --- Load Dataset ---
df = pd.read_csv("covid.csv")

# --- Basic Information ---
print("Dataset Shape:", df.shape)
print("Columns:", df.columns.to_list())

# --- Preprocessing ---
rename_map = {
    'Country/Region': 'Country',
    'TotalCases': 'Total_Cases',
    'TotalDeaths': 'Total_Deaths',
    'TotalRecovered': 'Total_Recovered',
    'ActiveCases': 'Active_Cases',
    'TotalTests': 'Total_Tests',
    'Population': 'Population',
    'Continent': 'Continent'
}
df.rename(columns=rename_map, inplace=True)

# Convert numeric columns
numeric_cols = [
    'Total_Cases', 'Total_Deaths', 'Total_Recovered', 'Active_Cases',
    'Total_Tests', 'Population'
]
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Compute missing Active Cases
if 'Active_Cases' not in df.columns or df['Active_Cases'].isna().all():
    df['Active_Cases'] = df['Total_Cases'].fillna(0) - df['Total_Recovered'].fillna(0) - df['Total_Deaths'].fillna(0)

# Drop rows missing country
df = df.dropna(subset=['Country'])
print("\nData cleaned successfully!")

# --- Summary Statistics ---
# If Date does not exist, create a dummy date column for plotting
if 'Date' not in df.columns:
    df['Date'] = pd.to_datetime('2025-10-06')  # You can use today's date

latest_date = df['Date'].max()
print("Latest Date in Dataset:", latest_date.date())

df_latest = df.groupby('Country', as_index=False).last()

# --- Plot 1: Top 15 Countries by Total Cases ---
fig1 = px.bar(
    df_latest.sort_values('Total_Cases', ascending=False).head(15),
    x='Country', y='Total_Cases', color='Total_Cases',
    title='Top 15 Countries by Total COVID-19 Cases'
)
fig1.show()

# --- Plot 2: Global Daily New Cases Over Time ---
if 'New_Cases' in df.columns:
    df_global = df.groupby('Date', as_index=False)['New_Cases'].sum()
    fig2 = px.line(
        df_global, x='Date', y='New_Cases',
        title='Global Daily New COVID-19 Cases Over Time'
    )
    fig2.show()

# --- Plot 3: Total Deaths vs Total Cases ---
fig3 = px.scatter(
    df_latest, x='Total_Cases', y='Total_Deaths',
    color='Continent' if 'Continent' in df_latest.columns else None,
    size='Total_Deaths', hover_name='Country', log_x=True, log_y=True,
    title='Total Deaths vs Total Cases (Log Scale)'
)
fig3.show()

# --- Plot 4: Total Cases vs Population ---
if 'Population' in df_latest.columns:
    fig4 = px.scatter(
        df_latest, x='Population', y='Total_Cases',
        hover_name='Country', color='Continent' if 'Continent' in df_latest.columns else None,
        log_x=True, log_y=True, size='Total_Cases',
        title='Total COVID-19 Cases vs Population (Log Scale)'
    )
    fig4.show()

# --- Plot 5: Total Cases by Continent ---
if 'Continent' in df_latest.columns:
    df_cont = df_latest.groupby('Continent', as_index=False)['Total_Cases'].sum()
    fig5 = px.bar(
        df_cont, x='Continent', y='Total_Cases', color='Continent',
        title='Total COVID-19 Cases by Continent'
    )
    fig5.show()

# --- Plot 6: Animated Choropleth Map (Spread Over Time) ---
if {'Date', 'Country', 'Total_Cases'}.issubset(df.columns):
    fig6 = px.choropleth(
        df, locations='Country', locationmode='country names',
        color='Total_Cases', hover_name='Country',
        animation_frame=df['Date'].dt.strftime('%Y-%m-%d'),
        title='COVID-19 Spread Over Time (Animated Map)',
        color_continuous_scale='Reds'
    )
    fig6.show()

# --- Insights ---
print("\nKey Insights:")
print("- Global pandemic peaks identified using new case trends.")
print("- Mortality and recovery rates vary by country and population size.")
print("- Interactive visualizations help policymakers understand severity.")
print("- Plotly Express allows easy exploration of dynamic COVID-19 data.")

print("\n✅ Analysis Complete!")


Dataset Shape: (15, 14)
Columns: ['Country/Region', 'Continent', 'Population', 'TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop', 'TotalTests', 'Tests/1M pop', 'WHO Region', 'iso_alpha']

Data cleaned successfully!
Latest Date in Dataset: 2025-10-06



Key Insights:
- Global pandemic peaks identified using new case trends.
- Mortality and recovery rates vary by country and population size.
- Interactive visualizations help policymakers understand severity.
- Plotly Express allows easy exploration of dynamic COVID-19 data.

✅ Analysis Complete!
