Coronavirus disease (COVID-19) is an infectious disease caused by a newly discovered coronavirus.

Most people infected with the COVID-19 virus will experience mild to moderate respiratory illness and recover without requiring special treatment. Older people, and those with underlying medical problems like cardiovascular disease, diabetes, chronic respiratory disease, and cancer are more likely to develop serious illness.

First Identified: December 2019 in Wuhan, the capital of Hubei province, China.
Most common symptoms:fever.dry cough.tiredness.
Less common symptoms:aches and pains.sore throat.diarrhoea.conjunctivitis.headache.loss of taste or smell.
Risk factors: Travel, Viral Exposure.
Prevention: Hand Washing, Face Coverings, Quarantine, Social Distancing.
More Info:https://www.who.int/health-topics/coronavirus#tab=tab_1

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')
#sns.set_palette(sns.xkcd_palette(sample_colors))
import warnings
warnings.filterwarnings("ignore")

In [None]:
def my_df_summary(data):
    '''Summarizing Function'''
    try:
        dat = data.copy()
        df = pd.DataFrame([dat.min(), dat.max(), dat.mean(), dat.std(), dat.isna().sum(), dat.nunique(), dat.dtypes],
                     index=['Minimum', 'Maximum', 'Mittelwert', 'Stand. Abw.','#NA', '#Uniques', 'dtypes'])        
        return df
    except:
        print('No Summary.')
        return data  


In [None]:
#Die einmal gesetzten Parameter greifen für alle nachfolgenden Plots, die mit plt erzeugt wurden.
plt.rc('font', size=14)          # controls default text sizes
plt.rc('axes', titlesize=14)     # fontsize of the axes title
plt.rc('axes', labelsize=14)     # fontsize of the x and y labels
plt.rc('xtick', labelsize=14)    # fontsize of the tick labels
plt.rc('ytick', labelsize=14)    # fontsize of the tick labels
plt.rc('legend', fontsize=10)    # legend fontsize
plt.rc('figure', titlesize=12)   # fontsize of the figure title
plt.rc('lines', linewidth=10, color='r')
plt.rcParams['figure.figsize']=(16,8)
Set1=['Red', 'Yellow', 'Navy', 'pink','purple','black','Blue']

In [None]:
#df = pd.read_csv('.csv', sep=';', header=None)

corona = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")
corona.head(10)

In [None]:
corona.info()

In [None]:
corona1=corona.dropna(how='all', axis=1)
#corona.drop(['People_Hospitalized'], axis=1)

In [None]:
corona1.head(10)

In [None]:
corona1.describe()

In [None]:
#name of columns
corona1.columns

In [None]:
#number of infected countries
corona1['Country_Region'].count()

In [None]:
#corona1['Deaths']

In [None]:
involved_countries=corona1['Country_Region'].unique().tolist()
involved_countries[20:30]

In [None]:
Total_confirmed=corona1['Confirmed'].sum()
print('\nGlobal Confirmed Cases from Corona Infection:\n{}\n'.format(Total_confirmed))

In [None]:
Total_death=corona1['Deaths'].sum()
print('\nGlobal Death from Corona Infection:\n{}\n'.format(Total_death))

In [None]:
#where no dead is reported
no_dead_countries=list(corona1.loc[corona1['Deaths'] == 0, 'Country_Region'])
len(no_dead_countries)

In [None]:
#The country with the most dead
most_dead_country=list(corona1.loc[corona1['Deaths'] == corona1['Deaths'].max() , 'Country_Region'])
most_dead_country
print('{} reported the most dead from with total of Corona Death {}'.format(most_dead_country, corona1['Deaths'].max()))

In [None]:
confirmed_US=corona1.loc[corona1['Country_Region'] == 'US' , 'Confirmed']
A=confirmed_US/Total_confirmed*100

In [None]:
#corona1.loc[corona1.Country_Region == 'US','Deaths']

In [None]:
#No. of Recovered Cases in US
corona1.loc[corona1.Country_Region == 'US',  'Recovered'] 

In [None]:
#Ranking the table based on the number of deads
sort_by_death = corona1.sort_values('Deaths',ascending=False)
sort_by_death.head(3)

In [None]:
sort_by_death['Death_Rate'] = sort_by_death['Deaths'] / sort_by_death['Confirmed'] * 100
sort_by_death['Recovery_Rate'] = sort_by_death['Recovered'] / sort_by_death['Confirmed'] * 100
sort_by_death['Incident_Rate'].fillna(0, inplace=True)
sort_by_death['Confirmed_percentage']=sort_by_death['Confirmed'] / Total_confirmed * 100

sort_by_death.drop(['Last_Update', 'Lat', 'Long_', 'UID', 'ISO3', 'Mortality_Rate'], axis=1).head(11)\
.style.hide_index()\
.background_gradient(cmap='Blues',subset=["Confirmed"])\
.background_gradient(cmap='Reds',subset=["Deaths"])\
.background_gradient(cmap='Greens',subset=["Recovered"])\
.background_gradient(cmap='Purples',subset=["Active"])\
.background_gradient(cmap='GnBu',subset=["Incident_Rate"])\
.background_gradient(cmap='OrRd',subset=["Death_Rate"])\
.background_gradient(cmap='PuBu',subset=["Recovery_Rate"])\
.background_gradient(cmap='Blues',subset=["Confirmed_percentage"])\



In [None]:
#Heat Map
sort_by_death.iloc[:,:].corr();
plt.figure(figsize=(12,6))
sns.heatmap(sort_by_death.corr(),cmap='coolwarm',annot=True)
plt.show()

In [None]:
#confirmed cases percentages per Country
#explodeTuple = (0.1, 1.0, 1.0, 0.0, 0.0, 0.0,0.0,.0,0.0,0.0)
labels = sort_by_death.Country_Region[:]
sizes = sort_by_death.Confirmed_percentage[:]

# Plot
plt.pie(sizes, labels=labels, 
        #explode=explodeTuple,
        autopct='%0.1f%%', 
        #shadow=True, 
        startangle=-60,
       )

plt.title("Confirmed Cases Percentage %")
plt.axis('equal')
plt.legend(title="Countries", loc="best")
plt.show()

In [None]:
#Dead Cases Percentages per Country-Top 10
explodeTuple = (0.1,0.0, 0.0, 0.0, 0.0, 0.0,0.0,0.0,0.0,0.0)
labels = sort_by_death.Country_Region[0:10]
sizes = sort_by_death.Deaths[0:10]


# Plot
plt.pie(sizes, labels=labels, 
        explode=explodeTuple,
        #labels=pieLabels,
        autopct='%0.1f%%', 
        #shadow=True, 
        startangle=-30,
       )

plt.title("Dead Cases Percentage %")
plt.axis('equal')
plt.legend(title="Countries", loc="best")
plt.show()

In [None]:
#Recovered Cases Percentages per Country-To 10
explodeTuple = (0.1, 0.0, 0.0, 0.0, 0.0, 0.0,0.0,0.0,0.0,0.0)
labels = sort_by_death.Country_Region[0:10]
sizes = sort_by_death.Recovered[0:10]

# Plot
plt.pie(sizes, labels=labels, 
        explode=explodeTuple,
        autopct='%0.1f%%', 
        #shadow=True, 
        startangle=-60,
       )

plt.title("Recovered Cases Percentage %")
plt.axis('equal')
plt.legend(title="Countries", loc="best")
plt.show()

In [None]:
plt.rcParams['figure.figsize']=(10,8)
labels = sort_by_death.Country_Region[0:10]
sizes = sort_by_death.Deaths[0:10]

In [None]:
sns.barplot(x=labels, y=sizes, errwidth=2, data=corona1, palette="coolwarm").set(title = 'Dead in the first top 10 listed countries', xlabel = 'Country', ylabel = 'No. of Dead' )
plt.show()

In [None]:
plt.rcParams['figure.figsize']=(10,8)
labels = sort_by_death.Country_Region[0:10]
sizes = sort_by_death.Recovered[0:10]
sns.barplot(x=labels, y=sizes, errwidth=2, data=corona1, palette="coolwarm").set(title = 'recovered in the first top 10 listed countries', xlabel = 'Country', ylabel = 'No. of Dead' )
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [14, 6]
divisions = list(sort_by_death.Country_Region[0:10])
#Confirmed=sort_by_death.Confirmed[0:10]
Deads = sort_by_death.Death_Rate[0:10]
Recovery = sort_by_death.Recovery_Rate[0:10]

index = np.arange(10)
width = 0.5

#plt.bar(index, Confirmed, color='Red', label="%Confirmed")
plt.bar(index+width, Recovery, color='Green', label="%Recovery")
plt.bar(index+width, Deads, width, color='Yellow', label="%Dead")

plt.title("(%)Recovery vs. Mortality Rate, Top 10 Countries of the Global List")
#plt.xlabel("Countries")
plt.ylabel("(%)")
plt.xticks(index+width/2, divisions)
        
plt.legend(loc='best')        
plt.show()

In [None]:
# LMplot 
sns.lmplot(x="Death_Rate", y="Recovery_Rate", data=sort_by_death)
plt.show()

# Time Serie Analysis

Daily report since beginning of the Pandemy in all countiries and updated up to today.

In [None]:
corona_cases_time = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_time.csv", parse_dates = ['Last_Update','Report_Date_String'])
corona_cases_time.head(10)

In [None]:
corona_time = pd.read_csv('covid.csv', parse_dates = ['ObservationDate','Last Update'])
corona_time.head(100)

In [None]:
print ('Last update: ' + str(corona_time.ObservationDate.max()))

In [None]:
corona_time

In [None]:
#the last 4 rows
corona_time.tail(4)

In [None]:
#cleaning
corona_time = corona_time.drop(['SNo', 'Last Update'], axis=1)
corona_time = corona_time.rename(columns={
    'ObservationDate': 'Date', 
    'Country/Region': 'Country_Region', 
    'Province/State': 'Province_State'
})
corona_time.head(2)

In [None]:
#the first and last recorde
(corona_time.Date.min(),corona_time.Date.max())

In [None]:
corona_time.shape

In [None]:
#calling the function my_df_summary() to summarize the time-serie records
my_df_summary(corona_time)

# line plot of the full time series 

In [None]:
corona_time = corona_time.set_index('Date')
corona_time 

In [None]:
corona_time['Confirmed'].plot(linewidth=0.5, color='red');
plt.title('Infected Cases over Time', size=14)
plt.ylabel('No. of confirmed cases')

In [None]:
corona_time['Deaths'].plot(linewidth=0.5, color='blue');
plt.title('Dead Cases over Time', size=14)
plt.ylabel('No. of dead cases')

In [None]:
corona_time['Recovered'].plot(linewidth=0.5, color='green');
plt.title('Recovered Cases over Time', size=14)
plt.ylabel('No. of recovered cases')
plt.show()

# Scatter plot of the full time series 

In [None]:
cols_plot = ['Confirmed', 'Recovered', 'Deaths']
axes = corona_time[cols_plot].plot(marker='.', alpha=0.5, linestyle='None', figsize=(11, 9), subplots=True)
for ax in axes:
    ax.set_ylabel(' Total Number')
plt.show()

In [None]:
plt.rcParams['figure.figsize']=(16,8)
fig, axs = plt.subplots(2,2)
ax = corona_time.loc['2020-01':'2020-02', 'Confirmed'].plot(marker='*', linestyle='None', figsize=(11, 9), ax=axs[0,0], subplots=True)
ax = corona_time.loc['2020-02':'2020-03', 'Confirmed'].plot(marker='.', linestyle='None', figsize=(11, 9), ax=axs[0,1], subplots=True)
ax = corona_time.loc['2020-03':'2020-04', 'Confirmed'].plot(marker='.', linestyle='None', figsize=(11, 9), ax=axs[1,0], subplots=True)
plt.ylabel('No. of dead cases during February and March 2020')
plt.show()

# Regression

In [None]:
plt.rcParams['figure.figsize']=(16,8)
fig, axs = plt.subplots(1, 2)
sort_by_death.plot(kind='scatter', y='Deaths', x='Confirmed', ax=axs[0], colors='green')
sort_by_death.plot(kind='scatter', y='Recovered', x='Confirmed', ax=axs[1], colors='green')
plt.show()

# Forcasting-Train and Split

In [None]:
corona_time.head(3)

In [None]:
corona_time_split = corona_time.drop(['Province_State', 'Country_Region'], axis=1)

In [None]:
corona_time_split1=corona_time_split.drop(['Deaths', 'Recovered'], axis=1)

In [None]:
corona_time_split1;

In [None]:
i_split = int(0.95*len(corona_time_split1))
df_train, df_test = corona_time_split1.iloc[:i_split,:], corona_time_split1.iloc[i_split:,:]
print('Train und Test are zustogether  {} input.'.format(len(df_train)+len(df_test)))
print('Forecast mus have just {} Periods.'.format(len(df_test)))

In [None]:
plt.rcParams['figure.figsize'] = [15, 6]

plt.plot(df_train.index, df_train.values, label='Trainingsdaten',color = 'red', ls='-')
plt.plot(df_test.index, df_test.values, label='Testdaten')
plt.axvline(x = corona_time_split1.index[i_split], linewidth=1, color='green', ls='-')
plt.legend(loc=2, fontsize=10)
plt.title('Infection Prediction', fontsize=14)
plt.xlabel('Time', fontsize=10)
plt.ylabel('No. of Confirmed Cases', fontsize=10)
plt.show()