# Covid 19 Analysis and EDA and Visualization

* You can find the full project at: https://ourworldindata.org/coronavirus
* Daily-updated dataset link: https://covid.ourworldindata.org/data/owid-covid-data.csv
* We'll be exploring the dataset from: https://covid.ourworldindata.org
* Dashboard Example: https://coronavirus.jhu.edu/map.html

### Load the need Libraries

In [13]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import warnings

### ignore the warnings messages and display the graph inside the jupyter

In [14]:
warnings.filterwarnings('ignore')
%matplotlib inline

### reading the dataset from website

In [None]:
covid19_df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')

In [None]:
type(covid19_df)    

In [None]:
pd.options.display.min_rows = 100  

covid19_df.head(50)

In [None]:
first_5_rows = covid19_df.head(5)   
first_5_rows

In [None]:
type(first_5_rows)

In [None]:
last_5_rows = covid19_df.tail(5)
last_5_rows

### display statisitical infom about the dataset

In [None]:
covid19_df.info()  

In [None]:
covid19_df.dtypes   # show the data type for each columns in the dataset

### statistical data [mean, std, count, min, first quarter1, ..etc]

In [None]:
covid19_df.describe()

In [None]:
covid19_df.agg(['mean', 'std'])

In [None]:
columns = covid19_df.columns
columns

In [None]:
first_5_rows

In [None]:
covid19_df.shape

In [None]:
covid19_df.head(100).style.background_gradient(cmap='Greens', )

### Explore Italy Data

In [None]:
covid19_df['location'].unique() 

In [None]:
covid19_df['location'].nunique()

In [None]:
covid19_df['location'].unique().size

In [None]:
italy = covid19_df[covid19_df['location'] == 'Italy']

In [None]:
italy.head()

In [None]:
type(italy)

In [None]:
italy.shape

In [None]:
italy.describe()

In [None]:
italy.info()

In [None]:
italy.dtypes

### number of null values in each columns

In [None]:
italy.isna().sum()

In [None]:
italy.head(100).style.background_gradient(cmap='Reds')

In [None]:
columns

### The highest number of cases in italy

In [None]:
italy['total_cases'].max()

### all data of the highest number of cases 

In [None]:
italy[italy['total_cases'] == italy['total_cases'].max()]

### The lowest number of cases in italy

In [None]:
italy['total_cases'].min()

### date of the loest number of cases 


In [None]:
italy[italy['total_cases'] ==  italy['total_cases'].min()]

### The max of total tests

In [None]:
italy['total_tests'].sum()

### Row of max total tests

In [None]:
italy[italy['total_tests'] == italy['total_tests'].max()]

### date of the max total tests

In [None]:
italy[italy['total_tests'] == italy['total_tests'].max()]['date'].values[0]

In [None]:
italy.head()

### total missing values

In [None]:
len(italy['total_tests']) - italy['total_tests'].isna().sum() 

### date tested the lowest amount

In [None]:
italy[italy['total_tests'] == italy['total_tests'].min()]['date'].values[0] 

### the total poster

In [None]:
italy['total_boosters'].max()

In [None]:
italy[italy['total_boosters'] == italy['total_boosters'].max()]

### date of the max booster tooken

In [None]:
italy[italy['total_boosters'] == italy['total_boosters'].max()]['date'].values[0]

### total number of people took the booster

In [None]:
italy['total_boosters'].sum()

### number of missing recored

In [None]:
len(italy['weekly_hosp_admissions']) - italy['weekly_hosp_admissions'].isna().sum()

### the max hospital admissions recored

In [None]:
italy['weekly_hosp_admissions'].max()

### the min hospital admisions recored

In [None]:
italy['weekly_hosp_admissions'].min()

### row of the max admisstions recored

In [None]:
italy[italy['weekly_hosp_admissions'] == italy['weekly_hosp_admissions'].max()]

### date of the max hospital admisstion recored 

In [None]:
italy[italy['weekly_hosp_admissions'] == italy['weekly_hosp_admissions'].max()]['date'].values[0] 

### date of the max recored

In [None]:
italy['date'].sort_values(ascending = False).head(1).values[0]

In [None]:
italy_df_overtime = italy.groupby(["date"])[["total_cases","new_cases","total_deaths","new_deaths"]].sum()
italy_df_overtime[:5]

In [None]:
# Using line-plot

plt.figure(figsize=(20,10))
    
    
#plt.plot(df_world_over_time['date'], df_world_over_time['total_cases'])

plt.plot(italy_df_overtime.index, italy_df_overtime['total_cases'])

 
plt.title('Evolution of Confirmed Covid-19 cases over time in the word', fontsize=16)
plt.xlabel('Days', fontsize=16)
plt.ylabel('Confirmed cases', fontsize=16)
plt.show()

In [None]:
italy['date'] = pd.to_datetime(italy_df_overtime['date'])

In [None]:
plt.barh(italy_df_overtime.index, italy_df_overtime['total_cases'])
plt.show()