In [None]:
import pandas as pd #data manipulation , analysis, cleaning 
import numpy as np # mathmatical calculations

# Plotly - interactive, open-source, and browser-based graphing library
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

* full_grouped.csv - Day to day country wise no. of cases (Has County/State/Province level data)
* covid_19_clean_complete.csv - Day to day country wise no. of cases (Doesn't have County/State/Province level data)
* country_wise_latest.csv - Latest country level no. of cases
* day_wise.csv - Day wise no. of cases (Doesn't have country level data)
* usa_county_wise.csv - Day to day county level no. of cases
* worldometer_data.csv - Latest data from https://www.worldometers.info/

In [None]:
country_wise = pd.read_csv('/kaggle/input/corona-virus-report/country_wise_latest.csv')
day_wise = pd.read_csv('/kaggle/input/corona-virus-report/day_wise.csv')
worldometer_data = pd.read_csv('/kaggle/input/corona-virus-report/worldometer_data.csv')
DD_CW1 = pd.read_csv('/kaggle/input/corona-virus-report/full_grouped.csv')
DD_CW2 = pd.read_csv('/kaggle/input/corona-virus-report/covid_19_clean_complete.csv')
usa = pd.read_csv('/kaggle/input/corona-virus-report/usa_county_wise.csv')

**Note - The data used in the analysis is only limited till year 2020.**

In [None]:
country_wise.head()

In [None]:
day_wise.head()

In [None]:
worldometer_data.head()

In [None]:
DD_CW1.head()

In [None]:
DD_CW2.head()

## 1) Country wise Analysis

In [None]:
country_wise.head()

In [None]:
# List of Different palettes names available
# px.colors.sequential.swatches_continuous()
# px.colors.diverging.swatches_continuous()

## 1. Total confirmed cases from each country

In [None]:
def plot_map(df, location_names,location_mode,data_col,scope,hover_name=None,title=None,palette='Sunset'):
    if hover_name == None: 
        hover_name = location_names
    fig = px.choropleth(df, 
                        locations=location_names,
                        locationmode =location_mode,
                        color = data_col,
                        scope = scope,
                        hover_name = hover_name,
                        hover_data = data_col,
                        title = title,
                        color_continuous_scale = palette)
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    fig.show()
    
plot_map(country_wise,location_names='Country/Region',location_mode='country names',data_col='Confirmed',scope='world',palette='Peach',title='Confirmed cases in world')
plot_map(country_wise,location_names='Country/Region',location_mode='country names',data_col='Deaths',scope='world',palette='amp', title='Death cases in world')
plot_map(country_wise,location_names='Country/Region',location_mode='country names',data_col='Recovered',scope='world',palette='Greens',title='Recovered cases in world')
plot_map(country_wise,location_names='Country/Region',location_mode='country names',data_col='Active',scope='world',palette='Oranges',title='Active cases in world')

* USA has most number of confirmed cases approx. - (4.29M) followed by Brazil - (2.44M) and India - (1.48M).
* USA stood first with highest number of Death cases - (148K) followed by Brazil - (87.6K) and India - (33.4K) cases respectively. It has highest number of Active cases as well with 2.81M cases.
* In Recovered cases , Brazil has over 1.84M cases followed by USA - 1.32M and India - 951.16K cases.

In [None]:
# top 30 countries

def get_countries(df,col,color):
    highest_col_cases = df.sort_values(col,ascending=False)[:30]

    fig = px.bar(highest_col_cases, 
                 x='Country/Region',
                 y=col,
                 color='WHO Region',
                 title=f'Top 30 Countries with highest {col} cases',
                 text_auto='.2s',
                 color_discrete_sequence=color
                 )
    fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_categoryorder = 'total descending')
    fig.show()
    
get_countries(df=country_wise,col='Confirmed',color=px.colors.qualitative.Prism)
get_countries(country_wise,'Deaths',px.colors.qualitative.Set1)
get_countries(country_wise,'Recovered',px.colors.qualitative.Dark2)

In [None]:
get_countries(country_wise,'1 week % increase',px.colors.qualitative.Sunset_r)

The above bar charts shows top 30 countries with Confirmed, Recovered and Active cases in world.

## Day wise

In [None]:
day_wise.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=day_wise.Date, y=day_wise.Confirmed,
                         name='Confirmed',
                         mode='lines+markers',
                         fill='tozeroy'
                    ))
fig.add_trace(go.Scatter(x=day_wise.Date, y=day_wise.Deaths,
                         name='Deaths',
                         mode= 'lines+markers',
                         fill='tozeroy'))
fig.add_trace(go.Scatter(x=day_wise.Date, y=day_wise.Recovered,
                         name='Recovered',
                         mode= 'lines+markers',
                         fill='tozeroy'))

fig.update_layout(title='Cases over time',
                 xaxis_title='Dates',
                 yaxis_title='Population')

fig.show()

Till July 27th, there were around 16.48M confirmed cases, 9.46M recovered cases and 654K deaths in world.

In [None]:
fig = px.bar(day_wise, 
             x="Date",
             y=["Deaths / 100 Cases",'Recovered / 100 Cases'],
             color_discrete_map = {'Deaths / 100 Cases': '#d43d3d', 
                                  'Recovered / 100 Cases': '#94e864',
                                  },
             barmode="stack",
             title='Change in Death and Recovered per 100 cases over time')
fig.show()

* The ratio of recovery cases with confirmed cases were much higher at all times than ratio of Death cases with confirmed cases. 
* On average around 34 recovered cases and 5 death cases were there out of 100 cases .

In [None]:
DD_CW1['Date'] = pd.to_datetime(DD_CW1['Date'],errors='raise')
DD_CW1['month'] = DD_CW1['Date'].dt.month
DD_CW1['month_date'] = DD_CW1['Date'].dt.day
DD_CW1['weekday'] = DD_CW1['Date'].dt.day_name()
DD_CW1.head()

In [None]:
def plot_bubble(df, x, y, color=None, size=None, palette=px.colors.qualitative.G10,log=True):
    fig = px.scatter(df,
                     x=x,
                     y=y,
                     size=size,
                     color=color,
                     hover_name="Country/Region",
                     size_max=50,
                     color_discrete_sequence=palette,
                     title=f'{x} Versus {y} across countries coloured by {color}',
                     log_y=log,
                     log_x=log)
    fig.show()
    
plot_bubble(df=worldometer_data, x="TotalCases", y="TotalDeaths", size="Population", color="WHO Region", palette=px.colors.qualitative.G10)
plot_bubble(df=worldometer_data, x="TotalCases", y="TotalRecovered", size="Population", color="WHO Region", palette=px.colors.qualitative.Dark2)
plot_bubble(df=worldometer_data, x="TotalCases", y="ActiveCases", size="Population", color="WHO Region", palette=px.colors.qualitative.D3)
plot_bubble(df=worldometer_data, x="TotalCases", y="Serious,Critical", size="Population", color="WHO Region", palette=px.colors.qualitative.Light24)

With increase in total cases, deaths, recovered, active and serious cases also increased.

In [None]:
def plot_bar(df, x, y,palette=px.colors.qualitative.Pastel):
    weekday_order = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
    fig=px.bar(DD_CW1,
               x=x,
               y=y,
               facet_col='month',
               color='Country/Region',
               category_orders={"weekday": weekday_order},
               color_discrete_sequence=palette,
               title=f'{y} cases from each country on different {x} through different months')
    fig.show()
    
plot_bar(DD_CW1, 'weekday', 'Confirmed',px.colors.qualitative.Safe )

In [None]:
plot_bar(DD_CW1, 'weekday', 'Deaths',px.colors.qualitative.Vivid )

In [None]:
plot_bar(DD_CW1, 'weekday', 'Recovered',px.colors.qualitative.Alphabet )

After June, the total cases coming at each day were very close. UK had most deaths after April.

In [None]:
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=("Date vs Confirmed", "Date vs Recovered", "Date vs Deaths", "Date vs Active"),
    shared_xaxes='all',)

fig.add_trace(go.Scatter(x=DD_CW1['Date'], y=DD_CW1['Confirmed'], marker=dict(color='#FDB344')),
              row=1, col=1)

fig.add_trace(go.Scatter(x=DD_CW1['Date'], y=DD_CW1['Recovered'], marker=dict(color='#94e864')),
              row=1, col=2)

fig.add_trace(go.Scatter(x=DD_CW1['Date'], y=DD_CW1['Deaths'], marker=dict(color='#FF3333')),
              row=2, col=1)

fig.add_trace(go.Scatter(x=DD_CW1['Date'], y=DD_CW1['Active'], marker=dict(color='#FDB344')),
              row=2, col=2)

# Update xaxis properties
fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_xaxes(title_text="Date", row=2, col=2)

# Update yaxis properties
fig.update_yaxes(title_text="Confirmed Cases", row=1, col=1)
fig.update_yaxes(title_text="Recovered Cases", row=1, col=2)
fig.update_yaxes(title_text="Deaths Cases", row=2, col=1)
fig.update_yaxes(title_text="Active Cases", row=2, col=2)

# Update title and height
fig.update_layout(title_text="Subplots showing different stats across Date", height=700,showlegend=False)

fig.show()

We see sudden increase in Death cases in May, which are be due to certain nation like UK, US, Brazil, Italy .

In [None]:
def plot_scatterGeo(df, locations, size=None, color=None,title=None,palette=None):
    countries_max_cases = df.groupby(['Country/Region','Date'])[['Confirmed','Deaths','Recovered','Active']].max().reset_index()
    # remove rows with values less than 0
    countries_max_cases = countries_max_cases.drop(countries_max_cases[(countries_max_cases.Confirmed < 0) | (countries_max_cases.Deaths < 0)|(countries_max_cases.Recovered < 0)| (countries_max_cases.Active < 0)].index)
    
    if size == 'Confirmed':
        size = countries_max_cases.Confirmed.pow(0.5)
    elif size == 'Deaths':
        size = countries_max_cases.Deaths.pow(0.7)
    elif size == 'Recovered':
        size = countries_max_cases.Recovered.pow(0.8)
    elif size == 'Active':
        size = countries_max_cases.Active.pow(0.5)
        
    fig = px.scatter_geo(countries_max_cases,
                         locations=locations,
                         locationmode='country names', 
                         color=color,
                         size=size,
                         hover_name="Country/Region",
                         range_color= [0, 1500], 
                         projection="natural earth",
                         animation_frame="Date", 
                         title=title,
                         color_continuous_scale=palette)

    fig.show()

plot_scatterGeo(df=DD_CW1, locations="Country/Region", size='Confirmed', color="Confirmed",title='Confirmed cases with time',palette='Pinkyl')
plot_scatterGeo(df=DD_CW1, locations="Country/Region", size='Recovered', color="Recovered",title='Recovered cases with time',palette='Greens')
plot_scatterGeo(df=DD_CW1, locations="Country/Region", size='Active', color="Active",title='Active cases with time',palette='Redor')
plot_scatterGeo(df=DD_CW1, locations="Country/Region", size='Deaths', color="Deaths",title='Death cases with time',palette='Reds')

Above plots shows how the cases spread,recovered,active,deaths with time .

## USA

In [None]:
usa_grouped = usa.groupby(['Province_State'])[['Confirmed','Deaths']].max().reset_index()
us_code = {'Alabama': 'AL', 'Alaska': 'AK', 'American Samoa': 'AS', 'Arizona': 'AZ', 'Arkansas': 'AR', 
    'California': 'CA','Colorado': 'CO','Connecticut': 'CT','Delaware': 'DE', 'District of Columbia': 'DC', 
    'Florida': 'FL', 'Georgia': 'GA', 'Guam': 'GU', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN','Iowa': 'IA','Kansas': 'KS','Kentucky': 'KY','Louisiana': 'LA','Maine': 'ME',
    'Maryland': 'MD','Massachusetts': 'MA','Michigan': 'MI','Minnesota': 'MN','Mississippi': 'MS',
    'Missouri': 'MO','Montana': 'MT','Nebraska': 'NE','Nevada': 'NV','New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Northern Mariana Islands':'MP',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Puerto Rico': 'PR',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX',
    'Utah': 'UT', 'Vermont': 'VT', 'Virgin Islands': 'VI', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'}

usa_grouped['Code'] = usa_grouped['Province_State'].map(us_code)

In [None]:
plot_map(usa_grouped,location_names='Code',location_mode='USA-states',data_col='Confirmed',scope='usa',title='USA States Confirmed cases',hover_name='Province_State',palette='matter')

In [None]:
plot_map(usa_grouped,location_names='Code',location_mode='USA-states',data_col='Deaths',scope='usa',title='USA States Death cases',hover_name='Province_State',palette='amp')

* New York(NY) had most number of confirmed cases - 224K followed by California(CA) - 176K and Arizona(AZ) - 110K.
* In Deaths, NY has had most deaths 23.5K followed by Illinois - 5K .