# Project : Analysis of Covid 19 Cases From a Dataset Using Python

In [None]:
# Importing all the useful libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Creating a DataFrame from a latest Covid19 cases dataset
data = pd.read_csv("../input/13th-december-2020-covid19-cases/12-13-2020.csv")

In [None]:
# Displaying head of DataFrame
data.head()

In [None]:
# Displaying information of DataFrame
data.info()

In [None]:
# Checking missing values and filling it with the mean of that coresponding column and coverting the datatype into INTEGER
for i in range(7,11):
    data[data.columns[i]] = data[data.columns[i]].fillna(data[data.columns[i]].mean()).astype(int)

# Coverting negative values of Active cases into positive values as cases can`t be negative
data["Active"] = abs(data["Active"])

# Coverting datatype of Last Update column into datetime
data['Last_Update'] = pd.to_datetime(data['Last_Update'])

# Checking for the updations
data.info()

In [None]:
# Solution 2 - Displaying latest number of confirmed, death, recovered, and active cases with respect to Country 
tempList = []
for i in data['Country_Region'].unique():
    tempSubList = []
    tempSubList.append(i)
    for j in range(7,11):
        tempSubList.append(data[data.columns[j]][(data['Last_Update'] == max(data['Last_Update'])) & (data['Country_Region'] == i)].sum())
    tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Country","Confirmed","Deaths","Recovered","Active"])
temp.head()

In [None]:
# Solution 3 - Displaying latest number of confirmed, death, and recovered cases with respect to Country and Province
tempList = []
for i in data['Country_Region'].unique():
    for j in data[data['Country_Region'] == i].Province_State.unique():
        tempSubList = []
        tempSubList.append(i)
        tempSubList.append(j)
        for k in range(7,10):
            tempSubList.append(data[data.columns[k]][(data['Last_Update'] == max(data['Last_Update'])) & (data['Country_Region'] == i) & (data['Province_State'] == j)].sum())
        tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Country","Province","Confirmed","Deaths","Recovered"])
temp.head()

# As null values in Province column can`t be filled up so the result includes alot of NaN values

In [None]:
# Solution 4 - Displaying number of confirmed, death, and recovered cases with respect to the Provinces of China
tempList = []
for j in data[data['Country_Region'] == 'China'].Province_State.unique():
    tempSubList = []
    tempSubList.append(j)
    for k in range(7,10):
        tempSubList.append(data[data.columns[k]][(data['Country_Region'] == 'China') & (data['Province_State'] == j)].sum())
    tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Province","Confirmed","Deaths","Recovered"])
temp.head()

In [None]:
# Solution 5 - Latest number of deaths with respect to Country
temp = data.iloc[:,[4,3,7]]
temp.sort_values(by='Last_Update', ascending=False)
del temp["Last_Update"]
temp = temp.reset_index(drop=True)
temp.head()

In [None]:
# Solution 6 - Countries with no number of recovered cases
tempList = []
for i in data['Country_Region'].unique():
    if int(data['Recovered'][(data['Country_Region'] == i)].sum()) == 0:
        tempList.append(i)
temp = pd.DataFrame(tempList,columns=["Country"])
temp

In [None]:
# Solution 7 - Countries with all number of died cases
tempList = []
for i in data['Country_Region'].unique():
    tempSubList = []
    tempSubList.append(i)
    tempSubList.append(data['Deaths'][(data['Country_Region'] == i)].sum())
    tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Country","Deaths"])
temp.head()

In [None]:
# Solution 8 - Countries with all number of recovered cases
tempList = []
for i in data['Country_Region'].unique():
    tempSubList = []
    tempSubList.append(i)
    tempSubList.append(data['Recovered'][(data['Country_Region'] == i)].sum())
    tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Country","Recovered"])
temp.head()

In [None]:
# Solution 9 - Top 10 Countries with number of confirmed, death, and recovered cases with respect to Country and last Update
tempList = []
c = 0
for i in data['Country_Region'].unique():
    c+=1
    if c > 10:
        break
    tempSubList = []
    tempSubList.append(max(data['Last_Update'][data['Country_Region'] == i]))
    tempSubList.append(i)
    for j in range(7,11):
        tempSubList.append(data[data.columns[j]][(data['Country_Region'] == i)].sum())
    tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Last Update","Country","Confirmed","Deaths","Recovered","Active"])
temp

In [None]:
# Solution 10 - Number of confirmed, death, recovered, and active cases with respect to Country where deaths are greater than 150
tempList = []
for i in data['Country_Region'].unique():
    tempSubList = []
    if data['Deaths'][(data['Country_Region'] == i)].sum() > 150:
        tempSubList.append(i)
        for j in range(7,11):
            tempSubList.append(data[data.columns[j]][(data['Last_Update'] == max(data['Last_Update'])) & (data['Country_Region'] == i)].sum())
        tempList.append(tempSubList)
temp = pd.DataFrame(tempList,columns=["Country","Confirmed","Deaths","Recovered","Active"])

# Plotting line graph of above dataset

# Enable this to generate a big expandable figure
# fig = plt.figure(num=None, figsize=(100, 100), dpi=80, facecolor='w', edgecolor='k')
plt.plot(temp.Country, temp.Confirmed)
plt.plot(temp.Country, temp.Deaths)
plt.plot(temp.Country, temp.Recovered)
plt.plot(temp.Country, temp.Active)
plt.legend([temp.columns[i] for i in range(1,5)])
plt.show()

In [None]:
# Solution 11 - Visualization of number of death cases with respect to USA`s state/province
temp = [[],[]]
for i in data[data.Country_Region == 'US'].Province_State.unique():
    temp[0].append(i)
    temp[1].append(data.Deaths[data.Province_State == i].sum())

# Enable this to generate a big expandable figure
# fig = plt.figure(num=None, figsize=(100, 100), dpi=80, facecolor='w', edgecolor='k')
plt.plot(temp[0],temp[1])
plt.show()

In [None]:
# Solution 12 - Visualization of number of active cases with respect to USA`s state/province
temp = [[],[]]
for i in data[data.Country_Region == 'US'].Province_State.unique():
    temp[0].append(i)
    temp[1].append(data.Active[data.Province_State == i].sum())

# Enable this to generate a big expandable figure
# fig = plt.figure(num=None, figsize=(100, 100), dpi=80, facecolor='w', edgecolor='k')
plt.plot(temp[0], temp[1])
plt.show()

In [None]:
# Solution 13 - Visualization of number of confirmed, death, and active cases with respect to Pakistan`s state/province
temp = [[],[],[],[],[]]
for i in data[data.Country_Region == 'Pakistan'].Province_State.unique():
    temp[0].append(i)
    temp[1].append(data.Confirmed[data.Province_State == i].sum())
    temp[2].append(data.Deaths[data.Province_State == i].sum())
    temp[3].append(data.Recovered[data.Province_State == i].sum())
    temp[4].append(data.Active[data.Province_State == i].sum())
    
fig = plt.figure(num=None, figsize=(15, 10), dpi=80, facecolor='w', edgecolor='k')
plt.plot(temp[0], temp[1])
plt.plot(temp[0], temp[2])
plt.plot(temp[0], temp[3])
plt.plot(temp[0], temp[4])
plt.legend(["Confirmed", "Deaths", "Recovered", "Active"])
plt.show()

In [None]:
# Solution 14 - Visualization of Worldwide confirmed cases over time
temp = data.loc[:,['Last_Update','Confirmed']]
temp.sort_values('Last_Update', inplace=True)
# Enable this to generate a big expandable figure
fig = plt.figure(num=None, figsize=(15, 10), dpi=80, facecolor='w', edgecolor='k')
plt.plot_date(temp.Last_Update, temp.Confirmed, linestyle='dotted')
plt.gcf().autofmt_xdate()
plt.title('World Wide Covid19 Confirmed Cases')
plt.ylabel('Number of Cases')
plt.tight_layout
plt.show()