In [2]:
import datetime
import io
import json
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup


def getDailyConfirmed():
    # URL of the data file (time_series_covid19_confirmed_global.csv) from CSSE at Johns Hopkins University GitHub
    url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" \
          "csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

    # Fetch data from the GitHub
    global_confirmed_data = requests.get(url).content

    # Read the file and turn it in the dataframe type and set the first row as the column name
    df = pd.read_csv(io.StringIO(global_confirmed_data.decode('utf-8')), header=0, names=None)

    # Get the Hong Kong time series data according to the Province/State value and
    # Get all the columns except the first 4 columns and
    # Rename its row name as confirmed
    totalConfirmed_data = ((df.loc[df['Province/State'] == 'Hong Kong']).iloc[:, 4:]).rename(index={71: 'confirmed'})

    # Calculate the daily confirmed cases
    dailyConfirmed_data = totalConfirmed_data.copy(deep=True)
    for i in range(len(dailyConfirmed_data.columns) - 1, 0, -1):
        dailyConfirmed_data.iloc[0, i] -= dailyConfirmed_data.iloc[0, i - 1]

    # Print the total confirmed cases on each day and daily confirmed cases
    print(totalConfirmed_data)
    print(dailyConfirmed_data)
    return dailyConfirmed_data


def getVaccinations():
    # URL of the data file (vaccinations-by-age-group.csv) from Our World in Data on GitHub
    url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/" \
          "vaccinations/vaccinations-by-age-group.csv"

    # Fetch data from the GitHub
    vaccinations_data = requests.get(url).content

    # Read the file and turn it in the dataframe type and set the first row as the column name
    df = pd.read_csv(io.StringIO(vaccinations_data.decode('utf-8')), header=0, names=None)

    # Get the data about Hong Kong and get the transpose of the dataset
    vaccinations_data = ((df.loc[df['location'] == 'Hong Kong']).iloc[:, 1:6]).T.iloc[:, 5:]

    # Reset the columns' names
    vaccinations_data.columns = list(vaccinations_data.iloc[0])

    # Drop the original column name "date"
    vaccinations_data = vaccinations_data.drop("date", axis=0)

    # Get the first day of vaccination which has 8 columns (8 age groups)
    each_day = vaccinations_data.iloc[:, :8]

    # Get and set the date to the same format as the confirmed cases
    date = list(map(int, each_day.columns[0].split("-")))
    date = "/".join(list(map(str, [date[1] % 100, date[2] % 100, date[0] % 100])))

    # Reset the rows' name
    each_day.columns = list(each_day.iloc[0])

    # Drop the age_group row and reset the index as default value
    each_day = each_day.drop("age_group", axis=0).reset_index()

    # Conduct the dataframe from 3*8 to 24*2
    each_day = each_day.set_index(['index']).stack().reset_index()
    each_day.columns = ['type', 'age', date]

    # Merge the type and age group together
    each_day.insert(0, "type_age", each_day['type'] + "_" + each_day['age'])

    # Drop the two rows and initialize the dataframe named "vaccinations_set" to store the processed vaccinations data
    vaccinations_set = each_day.drop(["type", "age"], axis=1)

    # As same as the previous steps and process all the data
    for i in range(16, vaccinations_data.shape[1], 8):
        each_day = vaccinations_data.iloc[:, (i - 8):i]
        date = list(map(int, each_day.columns[0].split("-")))
        date = "/".join(list(map(str, [date[1] % 100, date[2] % 100, date[0] % 100])))
        each_day.columns = list(each_day.iloc[0])
        each_day = each_day.drop("age_group", axis=0).reset_index()
        each_day = each_day.set_index(['index']).stack().reset_index()
        each_day.columns = ['type', 'age', date]
        vaccinations_set.insert(vaccinations_set.shape[1], date, each_day[date])

    # Print the processed data, the format of data is [row index: different age groups and types][column names: date]
    print(vaccinations_set)
    return vaccinations_set


def getLeavingArrivals():
    # Create an empty dataframe
    leavingArrivals_data = pd.DataFrame({"1/23/20": {"residentsArrival": 0, "mainlandArrival": 0, "otherArrival": 0,
                                                     "totalArrival": 0, "residentsDeparture": 0, "mainlandDeparture": 0,
                                                     "otherDeparture": 0, "totalDeparture": 0}})
    # Set the beginning and end time date
    begin = datetime.date(2020, 1, 24)
    today = time.localtime()
    end = datetime.date(today.tm_year, today.tm_mon, today.tm_mday)

    # Get all the data from government website
    for i in range((end - begin).days):
        # Set the date time
        date = begin + datetime.timedelta(days=i)

        # Get the web info from government web with request
        source = requests.get("https://www.immd.gov.hk/hkt/stat_" + str(date).replace("-", "") + ".html")

        # Decoding the info so that the chinese shows normally
        source.encoding = source.apparent_encoding

        # Create a parser with BeautifulSoup
        parser = BeautifulSoup(source.text, 'html.parser')

        # Find all the "<tr>" stored the needed data with class name "p tr-boldText"
        parser_tr = parser.findAll(name="tr", attrs={"class": "p tr-boldText"})

        # Some of them named "q tr-boldText"
        if len(parser_tr) == 0:
            parser_tr = parser.findAll(name="tr", attrs={"class": "q tr-boldText"})

        # Find the needed data one by one with parser get_value() method and also delete the "," inside the number
        residentsArrival = parser_tr[0].find(name='td', attrs={
            "headers": "Hong_Kong_Residents_Arrival"}).get_text().replace(",", "")
        mainlandArrival = parser_tr[0].find(name='td', attrs={
            "headers": "Mainland_Visitors_Arrival"}).get_text().replace(",", "")
        otherArrival = parser_tr[0].find(name='td', attrs={
            "headers": "Other_Visitors_Arrival"}).get_text().replace(",", "")
        totalArrival = parser_tr[0].find(name='td', attrs={
            "headers": "Total_Arrival"}).get_text().replace(",", "")
        residentsDeparture = parser_tr[0].find(name='td', attrs={
            "headers": "Hong_Kong_Residents_Departure"}).get_text().replace(",", "")
        mainlandDeparture = parser_tr[0].find(name='td', attrs={
            "headers": "Mainland_Visitors_Departure"}).get_text().replace(",", "")
        otherDeparture = parser_tr[0].find(name='td', attrs={
            "headers": "Other_Visitors_Departure"}).get_text().replace(",", "")
        totalDeparture = parser_tr[0].find(name='td', attrs={
            "headers": "Total_Departure"}).get_text().replace(",", "")

        # Store them into the dataframe with the same column name as dailyConfirmed_data
        # and convert the type of data to int
        leavingArrivals_data.insert(leavingArrivals_data.shape[1],
                                    str(date.month) + "/" + str(date.day) + "/" + str(date.year),
                                    [int(residentsArrival), int(mainlandArrival), int(otherArrival), int(totalArrival),
                                     int(residentsDeparture),
                                     int(mainlandDeparture), int(otherDeparture), int(totalDeparture)])

    # Delete the first column which is meaningless
    leavingArrivals_data = leavingArrivals_data.drop(columns="1/23/20")

    # Print the leaving and arrivals on each day
    print(leavingArrivals_data)
    return leavingArrivals_data


# Return the data in json format

def dailyConfirmed():
    return json.loads(getDailyConfirmed().to_json(orient='columns'))




In [4]:
DailyConfirmed = getDailyConfirmed()

Vaccinations = getVaccinations()



           1/22/20  1/23/20  1/24/20  1/25/20  1/26/20  1/27/20  1/28/20  \
confirmed        0        2        2        5        8        8        8   

           1/29/20  1/30/20  1/31/20  ...  10/12/22  10/13/22  10/14/22  \
confirmed       10       10       12  ...   1816132   1821754   1826860   

           10/15/22  10/16/22  10/17/22  10/18/22  10/19/22  10/20/22  \
confirmed   1832284   1837848   1843209   1848163   1853287   1858720   

           10/21/22  
confirmed   1864113  

[1 rows x 1004 columns]
           1/22/20  1/23/20  1/24/20  1/25/20  1/26/20  1/27/20  1/28/20  \
confirmed        0        2        0        3        3        0        0   

           1/29/20  1/30/20  1/31/20  ...  10/12/22  10/13/22  10/14/22  \
confirmed        2        0        2  ...      4788      5622      5106   

           10/15/22  10/16/22  10/17/22  10/18/22  10/19/22  10/20/22  \
confirmed      5424      5564      5361      4954      5124      5433   

           10/21/22  
confirm

In [7]:
LeavingArrivals = getLeavingArrivals()

                    1/24/2020  1/25/2020  1/26/2020  1/27/2020  1/28/2020  \
mainlandArrival        102663      86417     163668     177470     188788   
mainlandDeparture       36705      29891      36690      28780      24156   
otherArrival            22872      19177      16707      12816      10248   
otherDeparture         162240     135485     217065     219066     223192   
residentsArrival       294785     186168     121396      90477      81904   
residentsDeparture      31340      28788      38300      34703      29546   
totalArrival            13018      14633      14778      18930      16517   
totalDeparture         339143     229589     174474     144110     127967   

                    1/29/2020  1/30/2020  1/31/2020  2/1/2020  2/2/2020  ...  \
mainlandArrival        197572     132506     116544    115122    122399  ...   
mainlandDeparture       27780      19555      16058     13382     11715  ...   
otherArrival            10998       8862       8257      9760     

In [5]:
DailyConfirmed

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,10/12/22,10/13/22,10/14/22,10/15/22,10/16/22,10/17/22,10/18/22,10/19/22,10/20/22,10/21/22
confirmed,0,2,0,3,3,0,0,2,0,2,...,4788,5622,5106,5424,5564,5361,4954,5124,5433,5393


In [6]:
Vaccinations

Unnamed: 0,type_age,2/23/21,2/26/21,2/27/21,2/28/21,3/1/21,3/2/21,3/3/21,3/4/21,3/5/21,...,10/6/22,10/7/22,10/8/22,10/9/22,10/10/22,10/11/22,10/12/22,10/13/22,10/14/22,10/15/22
0,people_vaccinated_per_hundred_0-19,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.01,...,99.01,99.02,99.02,99.03,99.03,99.04,99.04,99.05,99.05,99.05
1,people_vaccinated_per_hundred_20-29,0.0,0.01,0.04,0.06,0.07,0.08,0.1,0.12,0.14,...,103.16,103.16,103.16,103.16,103.17,103.17,103.17,103.18,103.18,103.18
2,people_vaccinated_per_hundred_30-39,0.0,0.03,0.07,0.11,0.14,0.18,0.22,0.27,0.31,...,97.53,97.54,97.54,97.54,97.55,97.55,97.55,97.55,97.56,97.56
3,people_vaccinated_per_hundred_40-49,0.01,0.08,0.19,0.31,0.39,0.5,0.61,0.72,0.83,...,92.11,92.11,92.12,92.12,92.13,92.13,92.14,92.14,92.15,92.15
4,people_vaccinated_per_hundred_50-59,0.01,0.08,0.2,0.3,0.38,0.51,0.64,0.76,0.89,...,87.16,87.17,87.17,87.17,87.18,87.19,87.2,87.21,87.22,87.22
5,people_vaccinated_per_hundred_60-69,0.02,0.28,0.49,0.71,0.99,1.56,2.03,2.43,2.82,...,74.08,74.1,74.11,74.11,74.13,74.14,74.16,74.18,74.19,74.2
6,people_vaccinated_per_hundred_70-79,0.01,0.17,0.36,0.54,0.7,1.18,1.6,1.96,2.32,...,72.33,72.38,72.4,72.42,72.43,72.45,72.46,72.49,72.52,72.54
7,people_vaccinated_per_hundred_80+,0.0,0.08,0.18,0.26,0.33,0.55,0.74,0.89,1.04,...,91.93,91.93,91.94,91.94,91.95,91.95,91.95,91.96,91.96,91.96
8,people_fully_vaccinated_per_hundred_0-19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,97.5,97.52,97.53,97.54,97.55,97.56,97.56,97.57,97.58,97.59
9,people_fully_vaccinated_per_hundred_20-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,102.33,102.34,102.35,102.35,102.36,102.37,102.37,102.38,102.38,102.39
