In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import Series, DataFrame
from datetime import datetime

In [None]:
url_confirmed = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
url_deaths = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

df_confirmed_primary = pd.read_csv(url_confirmed)
df_deaths_primary = pd.read_csv(url_deaths)


In [None]:
date_columns = df_confirmed_primary.columns[4:]
date_columns

Index(['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20', '1/30/20', '1/31/20',
       ...
       '2/28/23', '3/1/23', '3/2/23', '3/3/23', '3/4/23', '3/5/23', '3/6/23',
       '3/7/23', '3/8/23', '3/9/23'],
      dtype='object', length=1143)

In [None]:
# setting Date Format
date_format = pd.to_datetime(date_columns)
date_format

DatetimeIndex(['2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25',
               '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29',
               '2020-01-30', '2020-01-31',
               ...
               '2023-02-28', '2023-03-01', '2023-03-02', '2023-03-03',
               '2023-03-04', '2023-03-05', '2023-03-06', '2023-03-07',
               '2023-03-08', '2023-03-09'],
              dtype='datetime64[ns]', length=1143, freq=None)

In [None]:
# setting Date Range
range = (date_format >= '2020-02-01') & (date_format <= '2021-07-31')

In [None]:
df_confirmed = df_confirmed_primary.iloc[:, :4].join(df_confirmed_primary.iloc[:, 4:][date_columns[range]])
df_deaths = df_deaths_primary.iloc[:, :4].join(df_deaths_primary.iloc[:, 4:][date_columns[range]])

In [None]:
# Sum of confirmed cases and deaths
global_confirmed = df_confirmed.iloc[:, 4:].sum(axis=0)
global_deaths = df_deaths.iloc[:, 4:].sum(axis=0)

In [None]:
# Aggregated dataframe
global_data = pd.DataFrame({
    'Date': global_confirmed.index,
    'Confirmed Cases': global_confirmed.values,
    'Deaths': global_deaths.values
})

global_data

In [None]:
# Create new 2 columns for per day confirmed cases and per day deaths.
global_data['Confirmed cases/day'] = global_data['Confirmed Cases'].diff().fillna(global_data['Confirmed Cases'])
global_data['Deaths/day'] = global_data['Deaths'].diff().fillna(global_data['Deaths'])

global_data

In [None]:
#Read csv stock data into dataframes
df_AC = pd.read_csv("Air Canada.csv")
df_BoA = pd.read_csv("Bank of America.csv")
df_BGold = pd.read_csv("Barrick Gold Corporation.csv")

In [None]:
df_AC

In [None]:
# Keeping only stock close price
df_AC.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

In [None]:
df_BoA.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

In [None]:
df_BGold.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

In [None]:
df_merge1 = pd.merge(df_AC, df_BoA, on = 'Date')
df_merge1

In [None]:
# Merge 3 stock data
df_merge2 = pd.merge(df_merge1, df_BGold, on = 'Date')
df_merge2

In [None]:
# Renaming the stock close price
df_stock_data = df_merge2.rename(columns = {'Close_x': 'Close_AC', 'Close_y': 'Close_BoA', 'Close': 'Close_BGold'})
df_stock_data

In [None]:
date_column_stock = df_stock_data.Date
date_column_stock

In [None]:
# setting Date Format
date_format_stock = pd.to_datetime(date_column_stock)
date_format_stock

In [None]:
# Merging the COVID data with stock data
df_aggregated = global_data.iloc[:,:].join(df_stock_data.iloc[:, 1:])
df_aggregated

In [None]:
#Checking missing values
missing_value = df_aggregated.isnull().sum()
missing_value

In [None]:
# Dropping missing values
df_aggregated = df_aggregated.dropna()

In [None]:
#Checking missing values again
df_aggregated.isnull().sum()

In [None]:
df_aggregated

#**Visualization**

In [None]:
df_aggregated['Date']=pd.to_datetime(df_aggregated['Date'])
df_aggregated.dtypes

In [None]:
from datetime import datetime
import matplotlib.cbook as cbook
import matplotlib.dates as mdates
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

plt.figure(figsize=(15, 6))

plt.plot(df_aggregated["Date"], df_aggregated["Close_AC"], label='Close_AC', linewidth=3)
plt.plot(df_aggregated["Date"], df_aggregated["Close_BoA"], label='Close_BoA', linewidth=3)
plt.plot(df_aggregated["Date"], df_aggregated["Close_BGold"], label='Close_BGold', linewidth=3)

plt.bar(df_aggregated["Date"], df_aggregated["Confirmed cases/day"], label="Per day confirmed cases",color ='blue', width = 0.20)
plt.bar(df_aggregated["Date"], df_aggregated["Deaths/day"],label="Per day death", color ='red', width = 0.20)


dtFmt = mdates.DateFormatter('%Y-%m') # define the formatting
plt.gca().xaxis.set_major_formatter(dtFmt)
# # show every 12th tick on x axes
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))

plt.xticks(rotation=90, fontweight='light',  fontsize='x-small',)
#logarithmic scale for the y-axis
plt.yscale('log')

plt.xlabel('Date')
plt.ylabel('Values (log scale)')
plt.title('Closing prices of stock vs covid confirmed cases and deaths')
plt.legend()

We have used Line Chart and Bar Chart here to show the changes of COVID confirmed cases, deaths and stock prices with respect to time period (from Feb 2020 to Feb 2021).