In [1]:
import pandas as pd
import matplotlib as plt
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots


In [2]:
header_by_day = ["#","Country","Date","TotalCases","NewCases","TotalDeaths","NewDeaths","TotalRecovered","NewRecovered",
"ActiveCases","Serious,Critical","Tot Cases/1M pop","Deaths/1M pop","TotalTests","Tests/1M pop","Population","Continent"]

header_by_week = ["#", "Country", "Cases in the last 7 days", "Cases in the preceding 7 days",
"Weekly case /%/ change", "Cases in the last 7 days/1M pop", "Deaths in the last 7 days", 
"Deaths in the preceding 7 days", "Weekly Death /%/ change", "Deaths in the last 7 days/1M pop", "Population", "Continent"]

In [3]:
def RepresentsInt(s):
    try: 
        int(s)
        return True
    except ValueError:
        return False

def convert_numeric(num):
    condition = {",":"","+":"","%":""}
    if num == "nan" or num == " " or num == "  " or num == " N/A":
        return 0

    for i, j in condition.items():
        num = num.replace(i, j)
    if RepresentsInt(num) == True:
        return int(num)
    return float(num)

In [4]:
df_covid_by_day = pd.read_csv('Corona_by_day.tsv',sep='\t')

In [5]:
for i in range(3,16):
    df_covid_by_day[header_by_day[i]] = df_covid_by_day[header_by_day[i]].apply(lambda x:convert_numeric(str(x)))
    
df_covid_by_day[header_by_day[16]] = df_covid_by_day[header_by_day[16]].apply(lambda x:str(x).replace(" ",""))
df_covid_by_day['Date'] = pd.to_datetime(df_covid_by_day['Date'],format="%d-%m-%Y")

In [6]:
df_covid_by_week = pd.read_csv('Corona_by_week.tsv',sep='\t')

In [7]:
for i in range(2,11):
    df_covid_by_week[header_by_week[i]] = df_covid_by_week[header_by_week[i]].apply(lambda x:convert_numeric(str(x)))
    
df_covid_by_week[header_by_week[-1]] = df_covid_by_week[header_by_week[-1]].apply(lambda x:str(x).replace(" ",""))

In [8]:
df_covid_by_day

Unnamed: 0,#,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,Population,Continent
0,1,USA,2022-02-28,80656025,34828,975951,843,53500262,278579,26179812,7770,241322,2920,950614834,2844228,334225917,NorthAmerica
1,2,India,2022-02-28,42931045,6915,514054,211,42324550,16864,92441,8944,30610,367,767481346,547211,1402532051,Asia
2,3,Brazil,2022-02-28,28787620,19516,649443,248,26336373,152750,1801804,8318,133853,3020,63776166,296539,215068475,SouthAmerica
3,4,France,2022-02-28,22702815,13483,138367,232,20901504,235191,1662944,2484,346537,2112,246629975,3764574,65513384,Europe
4,5,UK,2022-02-28,18886701,27312,161361,46,17449650,98709,1275690,289,275806,2356,480341745,7014519,68478212,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125,222,Western Sahara,2022-03-04,10,0,1,0,8,0,1,0,16,2,0,0,621823,Africa
1126,223,MS Zaandam,2022-03-04,9,0,2,0,7,0,0,0,0,0,0,0,0,
1127,224,Marshall Islands,2022-03-04,7,0,0,0,7,0,0,0,117,0,0,0,59856,Australia/Oceania
1128,225,Saint Helena,2022-03-04,2,0,0,0,2,0,0,0,327,0,0,0,6107,Africa


In [9]:
df_covid_by_day.dtypes

#                            int64
Country                     object
Date                datetime64[ns]
TotalCases                   int64
NewCases                     int64
TotalDeaths                  int64
NewDeaths                    int64
TotalRecovered               int64
NewRecovered                 int64
ActiveCases                  int64
Serious,Critical             int64
Tot Cases/1M pop             int64
Deaths/1M pop                int64
TotalTests                   int64
Tests/1M pop                 int64
Population                   int64
Continent                   object
dtype: object

In [10]:
df_covid_by_week

Unnamed: 0,#,Country,Cases in the last 7 days,Cases in the preceding 7 days,Weekly case /%/ change,Cases in the last 7 days/1M pop,Deaths in the last 7 days,Deaths in the preceding 7 days,Weekly Death /%/ change,Deaths in the last 7 days/1M pop,Population,Continent
0,1,S. Korea,1293249,909271,42.0,25189.0,797,500,59,16.0,51342824,Asia
1,2,Germany,1075329,1143573,-6.0,12766.0,1387,1437,-3,16.0,84231046,Europe
2,3,Vietnam,734744,427980,72.0,7437.0,682,604,13,7.0,98799997,Asia
3,4,Russia,722996,1031455,-30.0,4951.0,5422,5408,0,37.0,146038899,Europe
4,5,Japan,459767,508696,-10.0,3654.0,1594,1557,2,13.0,125830265,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...
212,213,Sierra Leone,0,4,-100.0,0.0,0,0,0,0.0,8247500,Africa
213,214,Tajikistan,0,3,-100.0,0.0,0,0,0,0.0,9894835,Asia
214,215,Tanzania,0,71,-100.0,0.0,0,2,-100,0.0,62570078,Africa
215,216,Ukraine,0,137426,-100.0,0.0,0,1399,-100,0.0,43295254,Europe


In [11]:
df_covid_by_week.dtypes

#                                     int64
Country                              object
Cases in the last 7 days              int64
Cases in the preceding 7 days         int64
Weekly case /%/ change              float64
Cases in the last 7 days/1M pop     float64
Deaths in the last 7 days             int64
Deaths in the preceding 7 days        int64
Weekly Death /%/ change               int64
Deaths in the last 7 days/1M pop    float64
Population                            int64
Continent                            object
dtype: object

### Câu 1,4,5 + World Chart (Toàn)

### Câu 2,3,6 (Trí)