In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

dfs=[]
list_name=[]
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        f_path=os.path.join(dirname, filename)
        if '.csv' in f_path:
            dfs.append(pd.read_csv(f_path))
            f_path=f_path.split('/')
            list_name.append(f_path[len(f_path)-1])
list_name=pd.DataFrame(list_name,columns=['name'])
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
list_name

Unnamed: 0,name
0,reference.csv
1,countries-aggregated.csv
2,us_simplified.csv
3,key-countries-pivoted.csv
4,us_deaths.csv
5,time-series-19-covid-combined.csv
6,worldwide-aggregate.csv
7,us_confirmed.csv


In [3]:
dfs[0].head(3)

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population
0,4,AF,AFG,4.0,,,,Afghanistan,33.93911,67.709953,Afghanistan,38928341.0
1,8,AL,ALB,8.0,,,,Albania,41.1533,20.1683,Albania,2877800.0
2,10,AQ,ATA,10.0,,,,Antarctica,-71.9499,23.347,Antarctica,


In [4]:
dfs[0]['Combined_Key'].value_counts()

Afghanistan                1
Lincoln, Nebraska, US      1
Harlan, Nebraska, US       1
Hayes, Nebraska, US        1
Hitchcock, Nebraska, US    1
                          ..
Grand, Colorado, US        1
Gunnison, Colorado, US     1
Hinsdale, Colorado, US     1
Huerfano, Colorado, US     1
Weston, Wyoming, US        1
Name: Combined_Key, Length: 4316, dtype: int64

In [5]:
pops=dfs[0].groupby('Country_Region')['Population'].sum().reset_index()
pops=pops[pops['Country_Region'].apply(lambda x: '2022' not in x)]
pops=pops.rename(columns={'Country_Region':'Country'})
pops

Unnamed: 0,Country,Population
0,Afghanistan,38928341.0
1,Albania,2877800.0
2,Algeria,43851043.0
3,Andorra,77265.0
4,Angola,32866268.0
...,...,...
193,West Bank and Gaza,5101416.0
194,Western Sahara,597330.0
196,Yemen,29825968.0
197,Zambia,18383956.0


In [6]:
pops[pops['Population']==0]

Unnamed: 0,Country,Population
5,Antarctica,0.0
49,Diamond Princess,0.0
106,MS Zaandam,0.0
168,Summer Olympics 2020,0.0


In [7]:
dfs[1].head(3)

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
0,2020-01-22,Afghanistan,0,0,0
1,2020-01-23,Afghanistan,0,0,0
2,2020-01-24,Afghanistan,0,0,0


In [8]:
dfs[1]['Country'].value_counts()

Afghanistan    816
Namibia        816
Netherlands    816
New Zealand    816
Nicaragua      816
              ... 
Ghana          816
Greece         816
Grenada        816
Guatemala      816
Zimbabwe       816
Name: Country, Length: 198, dtype: int64

In [9]:
tmp=dfs[1]['Date'].value_counts().reset_index()
tmp.columns=['Date','count']
tmp=tmp.sort_values('Date')
tmp

Unnamed: 0,Date,count
0,2020-01-22,198
26,2020-01-23,198
539,2020-01-24,198
540,2020-01-25,198
541,2020-01-26,198
...,...,...
276,2022-04-12,198
277,2022-04-13,198
278,2022-04-14,198
279,2022-04-15,198


In [10]:
import plotly.express as px

data=dfs[1]
def log_cal(x):
    if x>=1:
        return np.log(x)
    elif x==0:
        return 0
def rate_cal(row):
    if row['Confirmed']!=0:
        row['rate_Recov']=row['Recovered']/row['Confirmed']
        row['rate_Death']=row['Deaths']/row['Confirmed']
    else:
        row['rate_Recov']=0
        row['rate_Death']=0
    if row['Recovered']!=0:
        row['D/R']=row['Deaths']/row['Recovered']
        if row['D/R']>=10:
            row['D/R']=10
    else:
        row['D/R']=0
        if row['Deaths']!=0:
            row['D/R']=10
    if row['Population']!=0:
        row['percent_Conf']=row['Confirmed']/row['Population']
        row['percent_Recov']=row['Recovered']/row['Population']
        row['percent_Death']=row['Deaths']/row['Population']
    else:
        row['percent_Conf']=0
        row['percent_Recov']=0
        row['percent_Death']=0
    return row
data=pd.merge(data,pops,on='Country',how='inner')
data['Month']=pd.to_datetime(data['Date']).dt.to_period('M')
data=data.groupby(['Country','Month','Population']).agg({'Confirmed':'sum','Recovered':'sum','Deaths':'sum'}).reset_index()
data['Conf_log']=data['Confirmed'].apply(log_cal)
data=data.apply(rate_cal,axis=1)
cols=['Conf_log','rate_Recov','rate_Death','D/R','percent_Conf','percent_Recov','percent_Death']
def create_fig(data,col_name,map_type):
    fig = px.choropleth(data_frame=data, 
                    locations='Country',
                    locationmode='country names',
                    color=col_name,
                    color_continuous_scale='Rainbow',
                    animation_frame='Month',
                    projection=map_type)
 
    fig.update_geos(
        resolution=110
    )
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
        transition_duration=10
    )
    fig.update_layout(
        coloraxis_colorbar=dict(
            len=1.2,
            yanchor='middle',  
            y=0.5  
        )
    )

    fig.update_coloraxes(
        cmin=0,
        cmax=data[col_name].max(),
        colorbar=dict(
        thicknessmode='fraction',
        thickness=0.02 
    ))
    fig.update_layout(
        width=500,
        height=450  
    )
    
    fig.show()

In [11]:
for col in cols:
    print('Visualization of '+col)
    for map_type in['equirectangular','orthographic']:
        create_fig(data,col,map_type)

Visualization of Conf_log


Visualization of rate_Recov


Visualization of rate_Death


Visualization of D/R


Visualization of percent_Conf


Visualization of percent_Recov


Visualization of percent_Death
