In [None]:
!pip install pandas
!pip install numpy
!pip install plotly
!pip install matplotlib

In [None]:
#importing packages
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go 
import matplotlib.pyplot as plt

In [None]:
#importing dataframes as csv
confirmed_cases_df = pd.read_csv('covid_confirmed_cases.csv')
confirmed_deaths_df = pd.read_csv('confirmed_deaths.csv')

# Data Manipulation and Cleaning

In [None]:
#dropping columns that I don't need
confirmed_cases_df = confirmed_cases_df.drop(columns=['Lat', 'Long', 'Province/State'])
confirmed_deaths_df = confirmed_deaths_df.drop(columns=['Lat', 'Long', 'Province/State'])
confirmed_cases_df.head(10)

In [None]:
#group data by Country
confirmed_cases_df = confirmed_cases_df.groupby(by='Country/Region').aggregate(np.sum).T
confirmed_cases_df.index.name = 'Date'
confirmed_cases_df = confirmed_cases_df.reset_index()
confirmed_deaths_df = confirmed_deaths_df.groupby(by='Country/Region').aggregate(np.sum).T
confirmed_deaths_df.index.name = 'Date'
confirmed_deaths_df = confirmed_deaths_df.reset_index()
confirmed_cases_df.head()

In [None]:
confirmed_cases_melt_df = confirmed_cases_df.melt(id_vars='Date').copy()
confirmed_cases_melt_df.rename(columns={'value':'Confirmed'}, inplace=True)
confirmed_deaths_melt_df = confirmed_deaths_df.melt(id_vars='Date').copy()
confirmed_deaths_melt_df.rename(columns={'value':'Confirmed'}, inplace=True)
confirmed_cases_melt_df.head(10)

In [None]:
#formatting the date 
confirmed_cases_melt_df['Date'] = pd.to_datetime(confirmed_cases_melt_df['Date'])
confirmed_cases_melt_df['Date'] = confirmed_cases_melt_df['Date'].dt.strftime('%d/%m/%Y')
confirmed_deaths_melt_df['Date'] = pd.to_datetime(confirmed_deaths_melt_df['Date'])
confirmed_deaths_melt_df['Date'] = confirmed_deaths_melt_df['Date'].dt.strftime('%d/%m/%Y')
confirmed_deaths_melt_df.head(10)

In [None]:
max_date = confirmed_deaths_melt_df['Date'].max()
max_date

In [None]:
total_confirmed_df = confirmed_cases_melt_df[confirmed_cases_melt_df['Date'] == max_date]
total_confirmed = total_confirmed_df['Confirmed'].sum()
total_confirmed

In [None]:
total_death_df = confirmed_deaths_melt_df[confirmed_deaths_melt_df['Date'] == max_date]
total_death = total_death_df['Confirmed'].sum()
total_death

# Data Visualization

In [None]:
#create a new df
total_confirmed_df = confirmed_cases_melt_df[confirmed_cases_melt_df['Date'] == max_date]
total_confirmed_df

In [None]:
total_confirmed = total_confirmed_df['Confirmed'].sum()
total_confirmed

# 5 Visualizations


In [None]:
fig = px.bar(total_confirmed_df.sort_values('Confirmed', ascending=False).head(10)
             , x='Country/Region', y='Confirmed', text='Confirmed', title='Total COVID 19 cases, Top 10')
fig.show()

In [None]:
plt.scatter(x='Country/Region', y='Confirmed', data=confirmed_cases_melt_df, s=10)
plt.show(10)

In [None]:
fig2 = px.scatter(confirmed_deaths_melt_df, x='Date', y='Confirmed', color='Country/Region', 
                  title='Increase in COVID 19 deaths by date, all countries')
fig2.show()

In [None]:
fig3 = px.line(confirmed_deaths_melt_df[confirmed_deaths_melt_df['Country/Region'] == 'Ireland'], 
               x='Date', y='Confirmed', title='Increase in COVID 19 deaths by date, Ireland')
fig3.show()

In [None]:
fig4 = px.choropleth(total_confirmed_df, 
                     locations='Country/Region', locationmode='country names',
                     color_continuous_scale='Rainbow',
                     color=total_confirmed_df['Confirmed'], height=500, title='World map of all COVID 19 cases')                                                                     
fig4.show()

In [None]:
fig5 = px.scatter(total_death_df, x='Confirmed', y='Confirmed', color=total_death_df['Country/Region'],
                  size='Confirmed', log_x=True, height=500, title='Total COVID 19 Deaths, all countries')
fig5.show()