In [49]:
import pandas as pd
import numpy as np
import plotly.express as px
import pycountry
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Make Plotly open in browser automatically
pio.renderers.default = 'browser'

In [50]:
# Load datasets
cases = pd.read_csv(r"Datasets\CONVENIENT_global_confirmed_cases.csv")
deaths = pd.read_csv(r"Datasets\CONVENIENT_global_deaths.csv")

In [51]:
cases.head()

Unnamed: 0,Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antarctica,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Winter Olympics 2022,Yemen,Zambia,Zimbabwe
0,Province/State,,,,,,,,,,...,,,,,,,,,,
1,1/23/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
2,1/24/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1/25/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1/26/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
deaths.head()

Unnamed: 0,Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antarctica,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Winter Olympics 2022,Yemen,Zambia,Zimbabwe
0,Province/State,,,,,,,,,,...,,,,,,,,,,
1,1/23/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1/24/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1/25/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1/26/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
# Drop the first row ("Province/State")
cases = cases.drop(index=0)
deaths = deaths.drop(index=0)

# Rename first column to 'Date'
cases.rename(columns={cases.columns[0]: "Date"}, inplace=True)
deaths.rename(columns={deaths.columns[0]: "Date"}, inplace=True)

# Convert all numeric columns properly
cases.iloc[:, 1:] = cases.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
deaths.iloc[:, 1:] = deaths.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

In [54]:
# 🌍 TOTAL CASES AND DEATHS BY COUNTRY

# Drop the 'Date' column to aggregate per country
total_cases = cases.drop(columns=["Date"]).sum().reset_index()
total_deaths = deaths.drop(columns=["Date"]).sum().reset_index()

total_cases.columns = ["Country", "Total_Cases"]
total_deaths.columns = ["Country", "Total_Deaths"]

final_df = pd.merge(total_cases, total_deaths, on="Country")

print(final_df.head())

final_df.to_csv("Created Dataset/total_cases_deaths_by_country.csv", index=False)

       Country Total_Cases Total_Deaths
0  Afghanistan    209451.0       7896.0
1      Albania    334457.0       3598.0
2      Algeria    271496.0       6881.0
3      Andorra     47890.0        165.0
4       Angola    105288.0       1933.0


In [55]:
# Make a copy of cases and deaths
cases_datewise = cases.copy()
deaths_datewise = deaths.copy()

# Melt both datasets to long format: Date | Country | Cases/Deaths
cases_melted = cases_datewise.melt(id_vars=["Date"], var_name="Country", value_name="Cases")
deaths_melted = deaths_datewise.melt(id_vars=["Date"], var_name="Country", value_name="Deaths")

# Merge on Date and Country
datewise_df = pd.merge(cases_melted, deaths_melted, on=["Date", "Country"])

# Save to CSV
datewise_df.to_csv("Created Dataset/datewise_cases_deaths_by_country.csv", index=False)

print(datewise_df.head())


      Date      Country Cases Deaths
0  1/23/20  Afghanistan   0.0    0.0
1  1/24/20  Afghanistan   0.0    0.0
2  1/25/20  Afghanistan   0.0    0.0
3  1/26/20  Afghanistan   0.0    0.0
4  1/27/20  Afghanistan   0.0    0.0


In [56]:
# --- WORLD MAPS (Cases vs Deaths) ---
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Total Cases", "Total Deaths"),
    specs=[[{"type": "choropleth"}, {"type": "choropleth"}]]
)

fig.add_trace(
    go.Choropleth(
        locations=final_df["Country"],
        z=final_df["Total_Cases"],
        locationmode="country names",
        colorscale="Reds",
        colorbar_title="Cases",
    ), row=1, col=1
)

fig.add_trace(
    go.Choropleth(
        locations=final_df["Country"],
        z=final_df["Total_Deaths"],
        locationmode="country names",
        colorscale="Greys",
        colorbar_title="Deaths",
    ), row=1, col=2
)

fig.update_layout(
    title_text="🌍 Global COVID-19 Distribution: Total Cases vs Total Deaths",
    height=600,
)
fig.show()

In [57]:
# 📈 DAILY GLOBAL CASES & DEATHS
daily_cases = cases.drop(columns=["Date"]).sum(axis=1).diff().fillna(0)
daily_deaths = deaths.drop(columns=["Date"]).sum(axis=1).diff().fillna(0)

# Replace negative values with 0
daily_cases[daily_cases < 0] = 0
daily_deaths[daily_deaths < 0] = 0

dates = pd.to_datetime(cases["Date"])


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [60]:
# Save global daily cases and deaths to CSV
global_daily_df = pd.DataFrame({
    "Date": dates,
    "Daily_Cases": daily_cases,
    "Daily_Deaths": daily_deaths
})
global_daily_df.to_csv("Created Dataset/global_daily_cases_deaths.csv", index=False)
print(global_daily_df.head())

        Date  Daily_Cases  Daily_Deaths
1 2020-01-23          0.0           0.0
2 2020-01-24        187.0           7.0
3 2020-01-25        206.0           8.0
4 2020-01-26        190.0           0.0
5 2020-01-27        126.0          12.0


In [58]:
# Graph 1: Daily Cases
fig_cases = go.Figure()
fig_cases.add_trace(go.Scatter(
    x=dates,
    y=daily_cases,
    mode="lines",
    name="Daily Cases",
    line=dict(color="crimson", width=2)
))
fig_cases.update_layout(
    title="📈 Global Daily COVID-19 Cases",
    xaxis_title="Date",
    yaxis_title="Number of Cases",
    template="plotly_white"
)
fig_cases.show()

In [59]:
# Graph 2: Daily Deaths
fig_deaths = go.Figure()
fig_deaths.add_trace(go.Scatter(
    x=dates,
    y=daily_deaths,
    mode="lines",
    name="Daily Deaths",
    line=dict(color="gray", width=2)
))
fig_deaths.update_layout(
    title="💀 Global Daily COVID-19 Deaths",
    xaxis_title="Date",
    yaxis_title="Number of Deaths",
    template="plotly_white"
)
fig_deaths.show()