### load the data

In [226]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import cufflinks as cf

In [227]:
cf.go_offline()

In [228]:
train = pd.read_csv('covid.csv', low_memory=False)

In [229]:
train.head()

Unnamed: 0,Id,County,Province_State,Country_Region,Population,Weight,Date,Target,TargetValue
0,1,,,Afghanistan,27657145,0.058359,2020-01-23,ConfirmedCases,0
1,2,,,Afghanistan,27657145,0.583587,2020-01-23,Fatalities,0
2,3,,,Afghanistan,27657145,0.058359,2020-01-24,ConfirmedCases,0
3,4,,,Afghanistan,27657145,0.583587,2020-01-24,Fatalities,0
4,5,,,Afghanistan,27657145,0.058359,2020-01-25,ConfirmedCases,0


### zoom into different countries:USA

In [230]:
usa = train[train['Country_Region']=='US']

In [231]:
usa.isnull().sum()

Id                    0
County            15400
Province_State      280
Country_Region        0
Population            0
Weight                0
Date                  0
Target                0
TargetValue           0
dtype: int64

In [232]:
usa[usa['Province_State'].isnull()]['Population'].unique() # that means the data for one state is missing

array([324141489])

In [233]:
usa[usa['Population']==324141489]['Province_State'].unique()

array([nan], dtype=object)

In [234]:
usa_df = usa[['Date', 'Province_State', 'Target', 'TargetValue']]

In [235]:
usa_df1 = usa_df.groupby(['Province_State','Target']).sum().unstack('Target').reset_index()

In [236]:
usa_df1.columns=['states', 'confirmed', 'fatalities']

In [237]:
state_abbr = pd.read_csv('states.csv')

In [238]:
state_abbr.columns=['states', 'abbr']

In [239]:
usa_df2=state_abbr.merge(usa_df1, on='states')

In [300]:
usa_df2.head()

Unnamed: 0,states,abbr,confirmed,fatalities
0,Alabama,AL,43615,1483
1,Alaska,AK,1184,22
2,Arizona,AZ,59704,2200
3,Arkansas,AR,20458,330
4,California,CA,279430,9708


In [240]:
fig = px.choropleth(usa_df2, locations='abbr', locationmode="USA-states",scope='usa',
                    color='confirmed', hover_data=['states', 'fatalities'])
fig.show()

In [264]:
usa_trend = usa_df.groupby(['Date','Province_State','Target']).sum().unstack('Target').reset_index()
usa_trend.columns = ['date', 'states', 'confirmed', 'fatalities']

In [299]:
fig = px.line(usa_trend, x='date', y='confirmed', color='states', title='Daily confirmed cases in the USA till 2020-06-10')
fig.show()

In [298]:
fig = px.line(usa_trend, x='date', y='fatalities', color='states', title='Daily fatalities cases in the USA till 2020-06-10')
fig.show()

In [274]:
total_confirmed = usa_trend.groupby(['date', 'states']).sum().unstack('states')['confirmed'].cumsum().reset_index()

In [275]:
total_confirmed = pd.melt(total_confirmed, id_vars=['date'], value_vars = total_confirmed.columns)

In [277]:
total_confirmed.head()

Unnamed: 0,date,states,value
0,2020-01-23,Alabama,0
1,2020-01-24,Alabama,0
2,2020-01-25,Alabama,0
3,2020-01-26,Alabama,0
4,2020-01-27,Alabama,0


In [297]:
fig = px.line(total_confirmed, x='date', y='value', color='states', title = 'Total Covid cofirmed cases till 2020-06-10 in USA')
fig.show()

In [280]:
total_fatalities = usa_trend.groupby(['date', 'states']).sum().unstack('states')['fatalities'].cumsum().reset_index()

In [281]:
total_fatalities = pd.melt(total_fatalities, id_vars=['date'], value_vars = total_fatalities.columns)

In [283]:
total_fatalities.head()

Unnamed: 0,date,states,value
0,2020-01-23,Alabama,0
1,2020-01-24,Alabama,0
2,2020-01-25,Alabama,0
3,2020-01-26,Alabama,0
4,2020-01-27,Alabama,0


In [296]:
fig = px.line(total_fatalities, x='date', y='value', color='states', title = 'Total Covid fatalities cases till 2020-06-10 in USA')
fig.show()

### zoom into different countries: Australia

In [241]:
Australia = train[train['Country_Region']=='Australia']

In [242]:
Australia.isnull().sum()

Id                   0
County            2520
Province_State     280
Country_Region       0
Population           0
Weight               0
Date                 0
Target               0
TargetValue          0
dtype: int64

In [243]:
Aus_df = Australia[['Date', 'Province_State', 'Target', 'TargetValue']]

In [244]:
Aus_df1 = Aus_df.groupby(['Date','Province_State','Target']).sum().unstack('Target').reset_index()

In [245]:
Aus_df1.columns=['date','states', 'confirmed', 'fatalities']

In [293]:
fig = px.line(Aus_df1, x='date', y='confirmed', color='states', title='Daily confirmed cases in Australia till 2020-06-10')
fig.show()

In [246]:
total_confirmed_Aus = Aus_df1.groupby(['date', 'states']).sum()['confirmed'].unstack('states').reset_index().set_index('date').cumsum().reset_index()

In [247]:
total_confirmed_Aus.head()

states,date,Australian Capital Territory,New South Wales,Northern Territory,Queensland,South Australia,Tasmania,Victoria,Western Australia
0,2020-01-23,0,0,0,0,0,0,0,0
1,2020-01-24,0,0,0,0,0,0,0,0
2,2020-01-25,0,0,0,0,0,0,0,0
3,2020-01-26,0,3,0,0,0,0,1,0
4,2020-01-27,0,4,0,0,0,0,1,0


In [248]:
total_confirmed_Aus = pd.melt(total_confirmed_Aus, id_vars=['date'], value_vars=total_confirmed_Aus.columns)

In [252]:
total_confirmed_Aus.head()

Unnamed: 0,date,states,value
0,2020-01-23,Australian Capital Territory,0
1,2020-01-24,Australian Capital Territory,0
2,2020-01-25,Australian Capital Territory,0
3,2020-01-26,Australian Capital Territory,0
4,2020-01-27,Australian Capital Territory,0


In [295]:
fig = px.line(total_confirmed_Aus, x='date', y='value', color='states', title='Total Covid cofirmed cases till 2020-06-10 in Australia')
fig.show()

### zoom into different area: Europe

In [306]:
european_countries = ['Austria','Belgium','Bulgaria','Croatia','Cyprus','Czechia','Denmark','Estonia','Finland','France','Germany','Greece','Hungary','Ireland',
    'Italy', 'Latvia','Luxembourg','Lithuania','Malta','Norway','Netherlands','Poland','Portugal','Romania','Slovakia','Slovenia',
    'Spain', 'Sweden', 'United Kingdom', 'Iceland', 'Russia', 'Switzerland', 'Serbia', 'Ukraine', 'Belarus',
    'Albania', 'Bosnia and Herzegovina', 'Kosovo', 'Moldova', 'Montenegro', 'North Macedonia']

In [307]:
europe = train[train['Country_Region'].isin(european_countries)]

In [315]:
europe_covid = europe.groupby(['Country_Region', 'Target']).sum()['TargetValue'].unstack('Target').reset_index()

In [316]:
europe_covid.columns = ['european_countries', 'confirmed', 'fatalities' ]

In [324]:
fig = px.choropleth(europe_covid, locations='european_countries', locationmode = 'country names', 
                color='confirmed', scope='europe', hover_data=['confirmed'],
                hover_name='european_countries', color_continuous_scale='peach',
                title = 'European countries total covid confirmed cases till 2020-06-10')
fig.show()

In [333]:
europe_daily_covid = europe.groupby(['Date', 'Country_Region', 'Target']).sum()['TargetValue'].unstack('Target').reset_index()

In [334]:
europe_daily_covid.columns = ['date', 'countries', 'confirmed', 'fatalities']

In [340]:
fig = px.line(europe_daily_covid, x='date', y='confirmed', color='countries', title = 'Daily confirmed cases for European countries till 2020-06-10')
fig.show()