In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from datetime import timedelta
import warnings

from google.colab import drive
drive.mount('/content/drive')
warnings.filterwarnings('ignore')

import seaborn as sns

Mounted at /content/drive


In [None]:
# palette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'

sns.set_style('darkgrid')

In [None]:
# Complete data
complete_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/covid-19-analysis/covid_19_clean_complete.csv')
complete_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


In [None]:
# day wise
day_wise_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/covid-19-analysis/day_wise.csv')
day_wise_data['Date'] = pd.to_datetime(day_wise_data['Date'])
day_wise_data.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of countries
0,2020-01-22,555,17,28,510,0,0,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,1,2,2.75,4.59,60.0,8
2,2020-01-24,941,26,36,879,287,8,6,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,16,3,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,14,13,2.64,2.46,107.69,13


In [None]:
# By Country
country_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/covid-19-analysis/country_wise_latest.csv')
country_data = country_data.replace('', np.nan).fillna(0)
country_data.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.5,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.0,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.6,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa


In [None]:
# Grouped data
grouped_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/covid-19-analysis/full_grouped.csv')
grouped_data['Date'] = pd.to_datetime(grouped_data['Date'])
grouped_data.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa


In [None]:
# Worldometer
worldometer_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/covid-19-analysis/worldometer_data.csv')
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
worldometer_data.head()

Unnamed: 0,Country/Region,Continent,Population,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,WHO Region
0,USA,North America,331198100.0,5032179,0.0,162804.0,0.0,2576668.0,0.0,2292707.0,18296.0,15194.0,492.0,63139605.0,190640.0,Americas
1,Brazil,South America,212710700.0,2917562,0.0,98644.0,0.0,2047660.0,0.0,771258.0,8318.0,13716.0,464.0,13206188.0,62085.0,Americas
2,India,Asia,1381345000.0,2025409,0.0,41638.0,0.0,1377384.0,0.0,606387.0,8944.0,1466.0,30.0,22149351.0,16035.0,South-EastAsia
3,Russia,Europe,145940900.0,871894,0.0,14606.0,0.0,676357.0,0.0,180931.0,2300.0,5974.0,100.0,29716907.0,203623.0,Europe
4,South Africa,Africa,59381570.0,538184,0.0,9604.0,0.0,387316.0,0.0,141264.0,539.0,9063.0,162.0,3149807.0,53044.0,Africa


In [None]:
print("Total complete_data rows:", len(complete_data))
print("Total day_wise_data rows:", len(day_wise_data))
print("Total country_data rows:", len(country_data))
print("Total grouped_data rows:", len(grouped_data))
print("Total worldometer_data rows:", len(worldometer_data))

Total complete_data rows: 49068
Total country_data rows: 187
Total grouped_data rows: 35156
Total worldometer_data rows: 209


In [None]:
print(complete_data.columns, day_wise_data.columns, country_data.columns, grouped_data.columns, worldometer_data.columns)

Index(['Province/State', 'Country/Region', 'Lat', 'Long', 'Date', 'Confirmed',
       'Deaths', 'Recovered', 'Active', 'WHO Region'],
      dtype='object') Index(['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New cases',
       'New deaths', 'New recovered', 'Deaths / 100 Cases',
       'Recovered / 100 Cases', 'Deaths / 100 Recovered', 'No. of countries'],
      dtype='object') Index(['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
       'New cases', 'New deaths', 'New recovered', 'Deaths / 100 Cases',
       'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'Confirmed last week', '1 week change', '1 week % increase',
       'WHO Region'],
      dtype='object') Index(['Date', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
       'New cases', 'New deaths', 'New recovered', 'WHO Region'],
      dtype='object') Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered

In [None]:
# Plot maps
def map(df, col, pal):
  df = df[df[col] > 0]
  fig = px.choropleth(df, locations='Country/Region', locationmode='country names',
                       color=col, hover_name='Country/Region',
                       title=col, hover_data=[col],
                       color_continuous_scale=pal)
  fig.show()

In [None]:
map(country_data, 'Confirmed', 'matter')

In [None]:
map(country_data, 'Deaths', 'matter')

In [None]:
map(country_data, 'Recovered', 'matter')

In [None]:
map(country_data, 'Active', 'matter')

In [None]:
map(country_data, 'Deaths / 100 Cases', 'matter')

In [None]:
map(country_data, 'Recovered / 100 Cases', 'matter')

In [None]:
map(country_data, 'Recovered / 100 Cases', 'matter')

In [None]:
map(country_data, 'Deaths / 100 Recovered', 'matter')

In [None]:
def plot_day_wise(col):
  fig = px.line(day_wise_data, x='Date', y=col)
  fig.update_layout(title=col, xaxis_title='', yaxis_title='')
  fig.show()

In [None]:
plot_day_wise('Deaths / 100 Cases')

In [None]:
plot_day_wise('Recovered / 100 Cases')

In [None]:
plot_day_wise('Deaths / 100 Recovered')

In [None]:
def country_region_bar(col):
    fig = px.bar(country_data.sort_values(col).tail(10),
                 x=col,
                 y="Country/Region",
                 color='WHO Region',
                 text=col,
                 orientation='h',
                 width=1200,
                 color_discrete_sequence = px.colors.qualitative.Dark2)

    fig.update_layout(title=col,
                      yaxis_categoryorder='total ascending')
    fig.show()

In [None]:
country_region_bar('Deaths')

In [None]:
country_region_bar('Confirmed')

In [None]:
country_region_bar('Recovered')

In [None]:
country_region_bar('Active')

In [None]:
def covid_bubble_plot(df, x_col, y_col, size_col, color_col='WHO Region', title=None):
    """
    Create a bubble plot for COVID-related data.
    """

    fig = px.scatter(
        df,
        x=x_col,
        y=y_col,
        size=size_col,
        color=color_col,
        hover_name='Country/Region',
        size_max=60,          # max bubble size
        template='plotly_white',
        opacity=0.7,
        title=title or f'{y_col} vs {x_col} Bubble Plot'
    )

    fig.update_layout(
        xaxis_title=x_col,
        yaxis_title=y_col,
        legend_title=color_col,
        font=dict(size=12)
    )

    fig.show()


In [None]:
# Bubble plot of confirmed cases vs deaths, size by population
covid_bubble_plot(df=country_data,
                  x_col='Confirmed',
                  y_col='Deaths',
                  size_col='New cases',
                  color_col='WHO Region',
                  title='COVID-19: Confirmed vs Deaths by Country')


In [None]:
covid_bubble_plot(df=country_data,
                  x_col='Confirmed',
                  y_col='Deaths',
                  size_col='Active',
                  color_col='WHO Region',
                  title='Confirmed vs Deaths by Active')

In [None]:
import plotly.express as px

# Example: usa_states_data must have each US state as a row
# Columns: Province/State, Confirmed, Deaths, Recovered, Active
usa_states_data = country_data[country_data['Country/Region'] == 'US']
fig = px.choropleth(
    usa_states_data,
    locations='Country/Region',
    locationmode='USA-states',   # crucial for state-level map
    color='Confirmed',           # can be 'Deaths' or 'Active'
    hover_name='Active',
    hover_data=['Deaths', 'Recovered', 'Active'],
    color_continuous_scale='Reds',
    scope='usa',
    title='COVID-19 Cases by US State'
)

fig.show()


In [None]:
fig = px.imshow(country_data.corr(numeric_only=True),
          text_auto=True,
          color_continuous_scale='RdBu_r',
          title='Correlation Heatmap of Numeric Features')

fig.update_layout(
    width=1100,
    height=1200
)
fig.show()

In [None]:
plot_usa_map('Deaths')

In [None]:
fig = px.choropleth(usa_grouped, color='Confirmed', locations='Code', locationmode="USA-states",
                    scope="usa", color_continuous_scale="RdGy", title='No. of cases in USA')
fig

NameError: name 'usa_grouped' is not defined

In [None]:
country_region_bar('')

KeyError: ''

In [None]:
sns.pairplot(df, hue='Country/Region')

In [None]:
sns.pairplot(df, diag_kind='kde')

In [None]:
df.info()

In [None]:
df.columns

In [None]:
start_date = '2020-01-22'
end_date = '2020-12-28'

plt.figure(figsize=(8,5))
sns.lineplot(df, x='Date', y='Deaths', hue='Country/Region', legend=False)
plt.title('Deaths by Date for each Country')
plt.show()