<a href="https://colab.research.google.com/github/DigDataSteve/Analyzing_Covid-19_Data/blob/main/Covid_19_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analyzing Covid-19 Data


# Connect with Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
#import necessary libraries
import pandas as pd
import numpy as mp
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [3]:
path = "/content/drive/MyDrive/Covid-19_Analysis/Data"
files = os.listdir(path)
files

['full_grouped.csv',
 'covid_19_clean_complete.csv',
 'country_wise_latest.csv',
 'day_wise.csv',
 'usa_country_wise.csv',
 'worldometer_data.csv']

In [4]:
#read files into dataframes
def read_data(path, file):
  return pd.read_csv(path+'/'+file)

In [5]:
grouped_data = read_data(path, 'full_grouped.csv')
clean_data = read_data(path, 'covid_19_clean_complete.csv')
country_data = read_data(path, 'country_wise_latest.csv')
day_data = read_data(path, 'day_wise.csv')
usa_data = read_data(path, 'usa_country_wise.csv')
world_data = read_data(path, 'worldometer_data.csv')

# Analyzing Country Data for Maximum Numbers and Trends


1.   Find countries with highest cases, deaths, recoveries, and active cases
2.   Look at trends associated with deaths, recoveries, and active cases



In [6]:
for i in world_data:
  print(i)

Country/Region
Continent
Population
TotalCases
NewCases
TotalDeaths
NewDeaths
TotalRecovered
NewRecovered
ActiveCases
Serious,Critical
Tot Cases/1M pop
Deaths/1M pop
TotalTests
Tests/1M pop
WHO Region


In [7]:
world_data['WHO Region'].head()

0          Americas
1          Americas
2    South-EastAsia
3            Europe
4            Africa
Name: WHO Region, dtype: object

In [8]:
#Find countries with highest numbers
import plotly.express as px

columns = ['TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases']

for c in columns:
  treemap = px.treemap(world_data, values=c, path=['Country/Region'], title="Treemap of {} by Country".format(c))
  treemap.show()



In [9]:
#Look at trends for deaths, recoveries, active cases

px.line(day_data, x='Date', y=['Confirmed', 'Deaths', 'Recovered', 'Active'], title='Covid cases to date', template='plotly_dark')

# Analyze Test Results, Countries Most Impacted, and Top 20 Countries


1.   Visualize total test data (percentage of population that was tested)
2.   Top-20 countries most affected by Covid-19 (deaths per million)



In [10]:
#How much of the population has been tested in percentage terms showing the top 20 countries
populationz_test_ratio = world_data['TotalTests'].iloc[0:20]/world_data['Population'].iloc[0:20]
bar = px.bar(world_data.iloc[0:20], x='Country/Region', y=populationz_test_ratio[0:20], color='Country/Region', title='Population Covid Test Ratio')
bar.update_layout(xaxis={'categoryorder': 'total descending'})
bar.show()

In [11]:
world_data.columns

Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region'],
      dtype='object')

In [12]:
#Visualize what countries have the highest deaths per 1M people
fig = px.bar(world_data.iloc[0:20], x='Country/Region', y='Deaths/1M pop', color='Country/Region')
fig.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'}, title='Covid-19 Deaths per 1M People', template='plotly_dark')
fig.show()

# Analyze 20 Countries with Highest Negative Impact


1.   Look at top 20-countries with cases that are series/critical per population
2.   Look at top 5-countries with active cases per population
3.   Look at top 20-countries with highest number of deaths per population





In [13]:
#Top 20-countries with new cases per population
total_deaths_ratio = world_data['Serious,Critical'].iloc[0:20]/world_data['Population']
fig = px.bar(world_data[0:20], x='Country/Region', y=total_deaths_ratio[0:20]*100,title='Population Percentage in Serious/Critical (Shown as a Percentage)', color='Country/Region')
fig.update_layout(xaxis={'categoryorder': 'total descending'}, template='plotly_dark', yaxis_title='Serious/Critical Percentages')
fig.show()

In [14]:
world_data.columns


Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region'],
      dtype='object')

In [15]:
#Top 5 countries with most active cases per population
active_cases_data = world_data['ActiveCases'].iloc[0:5]/world_data['Population']
fig = px.bar(world_data.iloc[0:5], x='Country/Region', y=active_cases_data[0:5], color='Country/Region', template='plotly_dark')
fig.update_layout(xaxis={'categoryorder':'total descending'}, yaxis_title='Active Cases Percentages', title='Top 5 Countries with Most Active Cases per Population')
fig.show()

In [16]:
#Top 20 countries with highest number of deaths
fig = px.bar(world_data.sort_values(by='TotalDeaths', ascending=False)[0:20],y='Country/Region', x='TotalDeaths', color="TotalDeaths")
fig.update_layout(template='plotly_dark', title='Top 20 Countries with Highest Number of Deaths')
fig.show()

# Utilize Pie Charts for Further Analysis


1.   Top 15-countries w/ the most total cases
2.   Top 15-countries w/ the most total deaths
3.   Top 15-countries w/ the most active cases
4.   Top 15-countries w/ the most recoveries
5.   Deaths-to-cases ratio
6.   Deaths-to-recovered ratio




In [17]:
labels = world_data['Country/Region'][0:15].values
cases = ['TotalCases', 'TotalDeaths', 'ActiveCases', 'TotalRecovered']
for c in cases:
  fig = px.pie(world_data[0:15], values=world_data[c][0:15], names=labels, hole=0.275, title='Top 15-Countries with the most {}'.format(c))
  fig.update_layout(template='plotly_dark')
  fig.show()

In [18]:
#Total deaths to total cases ratio
deaths_cases = world_data['TotalDeaths']/world_data['TotalCases']
fig = px.bar(world_data, x='Country/Region', y=deaths_cases, title='Total Deaths to Total Cases Ratio')
fig.update_layout(template='plotly_dark', xaxis={'categoryorder':'total descending'})
fig.show()

In [19]:
#Deaths to recovered ratio
deaths_recovered = world_data['TotalDeaths']/world_data['TotalRecovered']
fig = px.bar(world_data, x='Country/Region', y=deaths_recovered, title='Total Deaths to Total Recovered Ratio')
fig.update_layout(template='plotly_dark', xaxis={'categoryorder':'total descending'})
fig.show()

In [20]:
grouped_data.tail()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
35151,2020-07-27,West Bank and Gaza,10621,78,3752,6791,152,2,0,Eastern Mediterranean
35152,2020-07-27,Western Sahara,10,1,8,1,0,0,0,Africa
35153,2020-07-27,Yemen,1691,483,833,375,10,4,36,Eastern Mediterranean
35154,2020-07-27,Zambia,4552,140,2815,1597,71,1,465,Africa
35155,2020-07-27,Zimbabwe,2704,36,542,2126,192,2,24,Africa


# Automate Visualization
Create a function to visualize country data


In [41]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go 

def visualize_country(df, country):
  data = df[df['Country/Region']==country]
  chart_data = data.loc[:, ['Date', 'Confirmed', 'Deaths', 'Active', 'Recovered']]

  fig = make_subplots(rows=1, cols=4, subplot_titles=('Confirmed', 'Deaths', 'Active', 'Recovered'))
  fig.add_trace(
      go.Scatter(name="Confirmed",x=chart_data['Date'],y=chart_data['Confirmed']),
      row=1, col=1
  )
  fig.add_trace(go.Scatter(name='Deaths', x=chart_data['Date'], y=chart_data['Deaths']), row=1, col=2)
  fig.add_trace(go.Scatter(name='Active', x=chart_data['Date'], y=chart_data['Active']), row=1, col=3)
  fig.add_trace(go.Scatter(name='Recovered', x=chart_data['Date'], y=chart_data['Recovered']), row=1, col=4)

  fig.update_layout(height=600, width=1100, template='plotly_dark', title='Date VS. Recorded {} Cases'.format(country))
  fig.show()

In [42]:
visualize_country(grouped_data, 'Mexico')