# COVID-19 Real-Time Disease Map: Data Producing

#### By Eason Shao

#### Start Date: 2021/09/22

This is a part of the project COVID-19 Real-Time Disease Map Pro

In [1]:
import csv
import requests
import pandas as pd
import numpy as np

---

## Part 1: Data

On **Github**, there is a lot of good data.

I'll just give out the URL here: [URL](https://raw.githubusercontent.com/canghailan/Wuhan-2019-nCoV/master/Wuhan-2019-nCoV.csv). Copy the data into a txt and change it into a csv.

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/canghailan/Wuhan-2019-nCoV/master/Wuhan-2019-nCoV.csv")

In [5]:
data.to_csv("Wuhan-2019-nCoV.csv")

In [6]:
data

Unnamed: 0,date,country,countryCode,province,provinceCode,city,cityCode,confirmed,suspected,cured,dead
0,2019-12-01,中国,CN,,,,,1,0,0,0
1,2019-12-01,中国,CN,湖北省,420000.0,,,1,0,0,0
2,2019-12-01,中国,CN,湖北省,420000.0,武汉市,420100,1,0,0,0
3,2019-12-02,中国,CN,,,,,1,0,0,0
4,2019-12-02,中国,CN,湖北省,420000.0,,,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
229079,2020-12-08,马约特,YT,,,,,5181,0,2964,49
229080,2020-12-08,南非,ZA,,,,,817878,0,745750,22249
229081,2020-12-08,赞比亚,ZM,,,,,17931,0,17211,364
229082,2020-12-08,津巴布韦,ZW,,,,,10839,0,8972,294


In [4]:
data.to_csv("data.csv")

In [5]:
data = data.drop(columns = ["suspected", "provinceCode", "cityCode"])

In [6]:
data

Unnamed: 0,date,country,countryCode,province,city,confirmed,cured,dead
0,2019-12-01,中国,CN,,,1,0,0
1,2019-12-01,中国,CN,湖北省,,1,0,0
2,2019-12-01,中国,CN,湖北省,武汉市,1,0,0
3,2019-12-02,中国,CN,,,1,0,0
4,2019-12-02,中国,CN,湖北省,,1,0,0
...,...,...,...,...,...,...,...,...
229079,2020-12-08,马约特,YT,,,5181,2964,49
229080,2020-12-08,南非,ZA,,,817878,745750,22249
229081,2020-12-08,赞比亚,ZM,,,17931,17211,364
229082,2020-12-08,津巴布韦,ZW,,,10839,8972,294


Drop the columns that we'll not use.

In [7]:
data["nowc"] = data["confirmed"] - data["cured"] - data["dead"]

Calculate the **Now Confirmed** Data.

In [8]:
data

Unnamed: 0,date,country,countryCode,province,city,confirmed,cured,dead,nowc
0,2019-12-01,中国,CN,,,1,0,0,1
1,2019-12-01,中国,CN,湖北省,,1,0,0,1
2,2019-12-01,中国,CN,湖北省,武汉市,1,0,0,1
3,2019-12-02,中国,CN,,,1,0,0,1
4,2019-12-02,中国,CN,湖北省,,1,0,0,1
...,...,...,...,...,...,...,...,...,...
229079,2020-12-08,马约特,YT,,,5181,2964,49,2168
229080,2020-12-08,南非,ZA,,,817878,745750,22249,49879
229081,2020-12-08,赞比亚,ZM,,,17931,17211,364,356
229082,2020-12-08,津巴布韦,ZW,,,10839,8972,294,1573


In [9]:
data.dtypes

date           object
country        object
countryCode    object
province       object
city           object
confirmed       int64
cured           int64
dead            int64
nowc            int64
dtype: object

In [10]:
world_data = pd.DataFrame(columns = ["date", "country", "countryCode", "confirmed", "cured", "dead", "nowc"])
province_data = pd.DataFrame(columns = ["date", "province", "confirmed", "cured", "dead", "nowc"])
city_data = pd.DataFrame(columns = ["date", "province", "city", "confirmed", "cured", "dead", "nowc"])
for index, row in data.iterrows():
    if isinstance(row["province"], str):
        if isinstance(row["city"], str):
            temp =  pd.Series({"date": row["date"],
                               "province": row["province"],
                               "city": row["city"],
                               "confirmed": row["confirmed"],
                               "cured": row["cured"],
                               "dead": row["dead"],
                               "nowc": row["nowc"]})
            city_data = city_data.append(temp, ignore_index = True)
        else:
            temp =  pd.Series({"date": row["date"],
                               "province": row["province"],
                               "confirmed": row["confirmed"],
                               "cured": row["cured"],
                               "dead": row["dead"],
                               "nowc": row["nowc"]})
            province_data = province_data.append(temp, ignore_index = True)
    else:
        temp = pd.Series({"date": row["date"],
                          "country": row["country"],
                          "countryCode": row["countryCode"],
                          "confirmed": row["confirmed"],
                          "cured": row["cured"],
                          "dead": row["dead"],
                          "nowc": row["nowc"]})
        world_data = world_data.append(temp, ignore_index = True)

In [11]:
city_data

Unnamed: 0,date,province,city,confirmed,cured,dead,nowc
0,2019-12-01,湖北省,武汉市,1,0,0,1
1,2019-12-02,湖北省,武汉市,1,0,0,1
2,2019-12-03,湖北省,武汉市,1,0,0,1
3,2019-12-04,湖北省,武汉市,1,0,0,1
4,2019-12-05,湖北省,武汉市,1,0,0,1
...,...,...,...,...,...,...,...
159075,2020-12-08,新疆维吾尔自治区,可克达拉市,0,0,0,0
159076,2020-12-08,新疆维吾尔自治区,昆玉市,0,0,0,0
159077,2020-12-08,台湾省,台湾省,718,582,7,129
159078,2020-12-08,香港特别行政区,香港特别行政区,7075,5696,112,1267


In [12]:
province_data

Unnamed: 0,date,province,confirmed,cured,dead,nowc
0,2019-12-01,湖北省,1,0,0,1
1,2019-12-02,湖北省,1,0,0,1
2,2019-12-03,湖北省,1,0,0,1
3,2019-12-04,湖北省,1,0,0,1
4,2019-12-05,湖北省,1,0,0,1
...,...,...,...,...,...,...
11018,2020-12-08,宁夏回族自治区,75,75,0,0
11019,2020-12-08,新疆维吾尔自治区,980,977,3,0
11020,2020-12-08,台湾省,718,582,7,129
11021,2020-12-08,香港特别行政区,7075,5696,112,1267


In [13]:
world_data

Unnamed: 0,date,country,countryCode,confirmed,cured,dead,nowc
0,2019-12-01,中国,CN,1,0,0,1
1,2019-12-02,中国,CN,1,0,0,1
2,2019-12-03,中国,CN,1,0,0,1
3,2019-12-04,中国,CN,1,0,0,1
4,2019-12-05,中国,CN,1,0,0,1
...,...,...,...,...,...,...,...
58976,2020-12-08,马约特,YT,5181,2964,49,2168
58977,2020-12-08,南非,ZA,817878,745750,22249,49879
58978,2020-12-08,赞比亚,ZM,17931,17211,364,356
58979,2020-12-08,津巴布韦,ZW,10839,8972,294,1573


In [14]:
city_data.to_csv("city_data.csv")
province_data.to_csv("province_data.csv")
world_data.to_csv("world_data.csv")

## Part 2: Version China - Provinces

In [15]:
province_data

Unnamed: 0,date,province,confirmed,cured,dead,nowc
0,2019-12-01,湖北省,1,0,0,1
1,2019-12-02,湖北省,1,0,0,1
2,2019-12-03,湖北省,1,0,0,1
3,2019-12-04,湖北省,1,0,0,1
4,2019-12-05,湖北省,1,0,0,1
...,...,...,...,...,...,...
11018,2020-12-08,宁夏回族自治区,75,75,0,0
11019,2020-12-08,新疆维吾尔自治区,980,977,3,0
11020,2020-12-08,台湾省,718,582,7,129
11021,2020-12-08,香港特别行政区,7075,5696,112,1267


We use provnce-data here.

However, we need to re-organize the data to the pyecharts format.

### Part 2.1 Total-confirmed

In [16]:
province_conf_data = province_data

In [17]:
province_conf_data = province_conf_data.drop(columns = ["cured", "dead", "nowc"])

In [18]:
province_conf_data

Unnamed: 0,date,province,confirmed
0,2019-12-01,湖北省,1
1,2019-12-02,湖北省,1
2,2019-12-03,湖北省,1
3,2019-12-04,湖北省,1
4,2019-12-05,湖北省,1
...,...,...,...
11018,2020-12-08,宁夏回族自治区,75
11019,2020-12-08,新疆维吾尔自治区,980
11020,2020-12-08,台湾省,718
11021,2020-12-08,香港特别行政区,7075


The names are wrong, so we also need to change it.

In [19]:
province_conf_data.loc[province_conf_data["province"] == "湖北省", ("province")] = "湖北"
province_conf_data.loc[province_conf_data["province"] == "湖南省", ("province")] = "湖南"
province_conf_data.loc[province_conf_data["province"] == "河北省", ("province")] = "河北"
province_conf_data.loc[province_conf_data["province"] == "河南省", ("province")] = "河南"
province_conf_data.loc[province_conf_data["province"] == "江西省", ("province")] = "江西"
province_conf_data.loc[province_conf_data["province"] == "江苏省", ("province")] = "江苏"
province_conf_data.loc[province_conf_data["province"] == "广东省", ("province")] = "广东"
province_conf_data.loc[province_conf_data["province"] == "吉林省", ("province")] = "吉林"
province_conf_data.loc[province_conf_data["province"] == "浙江省", ("province")] = "浙江"
province_conf_data.loc[province_conf_data["province"] == "安徽省", ("province")] = "安徽"
province_conf_data.loc[province_conf_data["province"] == "山东省", ("province")] = "山东"
province_conf_data.loc[province_conf_data["province"] == "海南省", ("province")] = "海南"
province_conf_data.loc[province_conf_data["province"] == "四川省", ("province")] = "四川"
province_conf_data.loc[province_conf_data["province"] == "贵州省", ("province")] = "贵州"
province_conf_data.loc[province_conf_data["province"] == "云南省", ("province")] = "云南"
province_conf_data.loc[province_conf_data["province"] == "辽宁省", ("province")] = "辽宁"
province_conf_data.loc[province_conf_data["province"] == "福建省", ("province")] = "福建"
province_conf_data.loc[province_conf_data["province"] == "山西省", ("province")] = "山西"
province_conf_data.loc[province_conf_data["province"] == "陕西省", ("province")] = "陕西"
province_conf_data.loc[province_conf_data["province"] == "甘肃省", ("province")] = "甘肃"
province_conf_data.loc[province_conf_data["province"] == "青海省", ("province")] = "青海"
province_conf_data.loc[province_conf_data["province"] == "台湾省", ("province")] = "台湾"
province_conf_data.loc[province_conf_data["province"] == "重庆市", ("province")] = "重庆"
province_conf_data.loc[province_conf_data["province"] == "上海市", ("province")] = "上海"
province_conf_data.loc[province_conf_data["province"] == "天津市", ("province")] = "天津"
province_conf_data.loc[province_conf_data["province"] == "北京市", ("province")] = "北京"
province_conf_data.loc[province_conf_data["province"] == "黑龙江省", ("province")] = "黑龙江"
province_conf_data.loc[province_conf_data["province"] == "西藏自治区", ("province")] = "西藏"
province_conf_data.loc[province_conf_data["province"] == "内蒙古自治区", ("province")] = "内蒙古"
province_conf_data.loc[province_conf_data["province"] == "宁夏回族自治区", ("province")] = "宁夏"
province_conf_data.loc[province_conf_data["province"] == "广西壮族自治区", ("province")] = "广西"
province_conf_data.loc[province_conf_data["province"] == "香港特别行政区", ("province")] = "香港"
province_conf_data.loc[province_conf_data["province"] == "澳门特别行政区", ("province")] = "澳门"
province_conf_data.loc[province_conf_data["province"] == "新疆维吾尔自治区", ("province")] = "新疆"

In [20]:
province_conf_data

Unnamed: 0,date,province,confirmed
0,2019-12-01,湖北,1
1,2019-12-02,湖北,1
2,2019-12-03,湖北,1
3,2019-12-04,湖北,1
4,2019-12-05,湖北,1
...,...,...,...
11018,2020-12-08,宁夏,75
11019,2020-12-08,新疆,980
11020,2020-12-08,台湾,718
11021,2020-12-08,香港,7075


In [21]:
province_conf_data["province"].value_counts()

湖北     374
广东     325
贵州     324
海南     324
四川     324
安徽     324
山东     324
上海     324
江西     324
宁夏     324
广西     324
北京     324
浙江     324
吉林     324
山西     323
重庆     323
河南     323
黑龙江    323
湖南     323
云南     323
天津     323
香港     322
河北     322
福建     322
台湾     322
江苏     322
辽宁     322
陕西     322
澳门     322
新疆     321
内蒙古    321
甘肃     321
青海     320
西藏     316
Name: province, dtype: int64

In [22]:
pconfd = pd.DataFrame(columns = ["Date",
                                 "北京",
                                 "天津",
                                 "上海",
                                 "重庆",
                                 "内蒙古",
                                 "新疆",
                                 "西藏",
                                 "宁夏",
                                 "广西",
                                 "香港",
                                 "澳门",
                                 "黑龙江",
                                 "吉林",
                                 "辽宁",
                                 "河北",
                                 "山西",
                                 "青海",
                                 "山东",
                                 "河南",
                                 "江苏",
                                 "安徽",
                                 "浙江",
                                 "福建",
                                 "江西",
                                 "湖南",
                                 "湖北",
                                 "广东",
                                 "台湾",
                                 "海南",
                                 "甘肃",
                                 "陕西",
                                 "四川",
                                 "贵州",
                                 "云南"])

In [23]:
pconfd

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖南,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南


In [24]:
for index, row in province_conf_data.iterrows():
    flag = 0
    for index1, row1 in pconfd.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            pconfd.loc[index1, row["province"]] = row["confirmed"]
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["province"]: row["confirmed"]})
    pconfd = pconfd.append(temp, ignore_index = True)

In [25]:
pconfd

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖南,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南
0,2019-12-01,,,,,,,,,,...,,1,,,,,,,,
1,2019-12-02,,,,,,,,,,...,,1,,,,,,,,
2,2019-12-03,,,,,,,,,,...,,1,,,,,,,,
3,2019-12-04,,,,,,,,,,...,,1,,,,,,,,
4,2019-12-05,,,,,,,,,,...,,1,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,951,300,1350,590,333,980,1,75,263,...,1020,68149,1997,686,171,182,501,808,147,219
370,2020-12-05,952,300,1353,590,333,980,1,75,263,...,1020,68149,2000,693,171,182,501,811,147,219
371,2020-12-06,952,301,1359,590,333,980,1,75,263,...,1020,68149,2002,716,171,182,502,812,147,220
372,2020-12-07,952,301,1366,590,336,980,1,75,263,...,1020,68149,2004,716,171,182,502,814,147,221


In [26]:
pconfd = pconfd.fillna(0)

In [27]:
pconfd

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖南,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,951,300,1350,590,333,980,1,75,263,...,1020,68149,1997,686,171,182,501,808,147,219
370,2020-12-05,952,300,1353,590,333,980,1,75,263,...,1020,68149,2000,693,171,182,501,811,147,219
371,2020-12-06,952,301,1359,590,333,980,1,75,263,...,1020,68149,2002,716,171,182,502,812,147,220
372,2020-12-07,952,301,1366,590,336,980,1,75,263,...,1020,68149,2004,716,171,182,502,814,147,221


In [28]:
pconfd["Total"] = pconfd["北京"] + pconfd["天津"] + pconfd["上海"] + pconfd["重庆"] + pconfd["内蒙古"] + pconfd["新疆"] + pconfd["西藏"]
pconfd["Total"] +=  pconfd["宁夏"] + pconfd["广西"]  + pconfd["香港"] + pconfd["澳门"] + pconfd["黑龙江"] + pconfd["吉林"] + pconfd["辽宁"]
pconfd["Total"] += pconfd["河北"] + pconfd["山西"] + pconfd["青海"] + pconfd["山东"] + pconfd["河南"] + pconfd["江苏"] + pconfd["安徽"]
pconfd["Total"] += pconfd["浙江"] + pconfd["福建"] + pconfd["江西"] + pconfd["湖南"] + pconfd["湖北"] + pconfd["广东"] + pconfd["台湾"]
pconfd["Total"] += pconfd["海南"] + pconfd["甘肃"] + pconfd["陕西"] + pconfd["四川"] + pconfd["贵州"] + pconfd["云南"]

In [29]:
pconfd

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南,Total
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,951,300,1350,590,333,980,1,75,263,...,68149,1997,686,171,182,501,808,147,219,94019
370,2020-12-05,952,300,1353,590,333,980,1,75,263,...,68149,2000,693,171,182,501,811,147,219,94142
371,2020-12-06,952,301,1359,590,333,980,1,75,263,...,68149,2002,716,171,182,502,812,147,220,94278
372,2020-12-07,952,301,1366,590,336,980,1,75,263,...,68149,2004,716,171,182,502,814,147,221,94373


In [30]:
pconfd.to_csv("pconfd.csv")

### Part 2.2: Now Confirmed

In [31]:
province_ncon_data = province_data
province_ncon_data = province_ncon_data.drop(columns = ["confirmed", "cured", "dead"])
province_ncon_data.loc[province_ncon_data["province"] == "湖北省", ("province")] = "湖北"
province_ncon_data.loc[province_ncon_data["province"] == "湖南省", ("province")] = "湖南"
province_ncon_data.loc[province_ncon_data["province"] == "河北省", ("province")] = "河北"
province_ncon_data.loc[province_ncon_data["province"] == "河南省", ("province")] = "河南"
province_ncon_data.loc[province_ncon_data["province"] == "江西省", ("province")] = "江西"
province_ncon_data.loc[province_ncon_data["province"] == "江苏省", ("province")] = "江苏"
province_ncon_data.loc[province_ncon_data["province"] == "广东省", ("province")] = "广东"
province_ncon_data.loc[province_ncon_data["province"] == "吉林省", ("province")] = "吉林"
province_ncon_data.loc[province_ncon_data["province"] == "浙江省", ("province")] = "浙江"
province_ncon_data.loc[province_ncon_data["province"] == "安徽省", ("province")] = "安徽"
province_ncon_data.loc[province_ncon_data["province"] == "山东省", ("province")] = "山东"
province_ncon_data.loc[province_ncon_data["province"] == "海南省", ("province")] = "海南"
province_ncon_data.loc[province_ncon_data["province"] == "四川省", ("province")] = "四川"
province_ncon_data.loc[province_ncon_data["province"] == "贵州省", ("province")] = "贵州"
province_ncon_data.loc[province_ncon_data["province"] == "云南省", ("province")] = "云南"
province_ncon_data.loc[province_ncon_data["province"] == "辽宁省", ("province")] = "辽宁"
province_ncon_data.loc[province_ncon_data["province"] == "福建省", ("province")] = "福建"
province_ncon_data.loc[province_ncon_data["province"] == "山西省", ("province")] = "山西"
province_ncon_data.loc[province_ncon_data["province"] == "陕西省", ("province")] = "陕西"
province_ncon_data.loc[province_ncon_data["province"] == "甘肃省", ("province")] = "甘肃"
province_ncon_data.loc[province_ncon_data["province"] == "青海省", ("province")] = "青海"
province_ncon_data.loc[province_ncon_data["province"] == "台湾省", ("province")] = "台湾"
province_ncon_data.loc[province_ncon_data["province"] == "重庆市", ("province")] = "重庆"
province_ncon_data.loc[province_ncon_data["province"] == "上海市", ("province")] = "上海"
province_ncon_data.loc[province_ncon_data["province"] == "天津市", ("province")] = "天津"
province_ncon_data.loc[province_ncon_data["province"] == "北京市", ("province")] = "北京"
province_ncon_data.loc[province_ncon_data["province"] == "黑龙江省", ("province")] = "黑龙江"
province_ncon_data.loc[province_ncon_data["province"] == "西藏自治区", ("province")] = "西藏"
province_ncon_data.loc[province_ncon_data["province"] == "内蒙古自治区", ("province")] = "内蒙古"
province_ncon_data.loc[province_ncon_data["province"] == "宁夏回族自治区", ("province")] = "宁夏"
province_ncon_data.loc[province_ncon_data["province"] == "广西壮族自治区", ("province")] = "广西"
province_ncon_data.loc[province_ncon_data["province"] == "香港特别行政区", ("province")] = "香港"
province_ncon_data.loc[province_ncon_data["province"] == "澳门特别行政区", ("province")] = "澳门"
province_ncon_data.loc[province_ncon_data["province"] == "新疆维吾尔自治区", ("province")] = "新疆"
pncond = pd.DataFrame(columns = ["Date",
                                 "北京",
                                 "天津",
                                 "上海",
                                 "重庆",
                                 "内蒙古",
                                 "新疆",
                                 "西藏",
                                 "宁夏",
                                 "广西",
                                 "香港",
                                 "澳门",
                                 "黑龙江",
                                 "吉林",
                                 "辽宁",
                                 "河北",
                                 "山西",
                                 "青海",
                                 "山东",
                                 "河南",
                                 "江苏",
                                 "安徽",
                                 "浙江",
                                 "福建",
                                 "江西",
                                 "湖南",
                                 "湖北",
                                 "广东",
                                 "台湾",
                                 "海南",
                                 "甘肃",
                                 "陕西",
                                 "四川",
                                 "贵州",
                                 "云南"])
for index, row in province_ncon_data.iterrows():
    flag = 0
    for index1, row1 in pncond.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            pncond.loc[index1, row["province"]] = row["nowc"]
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["province"]: row["nowc"]})
    pncond = pncond.append(temp, ignore_index = True)
pncond = pncond.fillna(0)
pncond["Total"] = pncond["北京"] + pncond["天津"] + pncond["上海"] + pncond["重庆"] + pncond["内蒙古"] + pncond["新疆"] + pncond["西藏"]
pncond["Total"] += pncond["宁夏"] + pncond["广西"] + pncond["香港"] + pncond["澳门"] + pncond["黑龙江"] + pncond["吉林"] + pncond["辽宁"]
pncond["Total"] += pncond["河北"] + pncond["山西"] + pncond["青海"] + pncond["山东"] + pncond["河南"] + pncond["江苏"] + pncond["安徽"]
pncond["Total"] += pncond["浙江"] + pncond["福建"] + pncond["江西"] + pncond["湖南"] + pncond["湖北"] + pncond["广东"] + pncond["台湾"]
pncond["Total"] += pncond["海南"] + pncond["甘肃"] + pncond["陕西"] + pncond["四川"] + pncond["贵州"] + pncond["云南"]
pncond

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南,Total
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,4,5,55,1,26,0,0,0,1,...,4,36,107,0,1,27,31,0,7,1501
370,2020-12-05,4,5,55,1,26,0,0,0,1,...,4,36,112,0,1,24,32,0,7,1562
371,2020-12-06,4,6,58,1,26,0,0,0,1,...,2,36,135,0,1,24,32,0,8,1630
372,2020-12-07,4,6,60,1,27,0,0,0,1,...,2,36,135,0,0,21,32,0,9,1649


In [32]:
pncond.to_csv("pncond.csv")

### Part 2.3: Cured

In [33]:
province_cure_data = province_data
province_cure_data = province_cure_data.drop(columns = ["confirmed","dead", "nowc"])
province_cure_data.loc[province_cure_data["province"] == "湖北省", ("province")] = "湖北"
province_cure_data.loc[province_cure_data["province"] == "湖南省", ("province")] = "湖南"
province_cure_data.loc[province_cure_data["province"] == "河北省", ("province")] = "河北"
province_cure_data.loc[province_cure_data["province"] == "河南省", ("province")] = "河南"
province_cure_data.loc[province_cure_data["province"] == "江西省", ("province")] = "江西"
province_cure_data.loc[province_cure_data["province"] == "江苏省", ("province")] = "江苏"
province_cure_data.loc[province_cure_data["province"] == "广东省", ("province")] = "广东"
province_cure_data.loc[province_cure_data["province"] == "吉林省", ("province")] = "吉林"
province_cure_data.loc[province_cure_data["province"] == "浙江省", ("province")] = "浙江"
province_cure_data.loc[province_cure_data["province"] == "安徽省", ("province")] = "安徽"
province_cure_data.loc[province_cure_data["province"] == "山东省", ("province")] = "山东"
province_cure_data.loc[province_cure_data["province"] == "海南省", ("province")] = "海南"
province_cure_data.loc[province_cure_data["province"] == "四川省", ("province")] = "四川"
province_cure_data.loc[province_cure_data["province"] == "贵州省", ("province")] = "贵州"
province_cure_data.loc[province_cure_data["province"] == "云南省", ("province")] = "云南"
province_cure_data.loc[province_cure_data["province"] == "辽宁省", ("province")] = "辽宁"
province_cure_data.loc[province_cure_data["province"] == "福建省", ("province")] = "福建"
province_cure_data.loc[province_cure_data["province"] == "山西省", ("province")] = "山西"
province_cure_data.loc[province_cure_data["province"] == "陕西省", ("province")] = "陕西"
province_cure_data.loc[province_cure_data["province"] == "甘肃省", ("province")] = "甘肃"
province_cure_data.loc[province_cure_data["province"] == "青海省", ("province")] = "青海"
province_cure_data.loc[province_cure_data["province"] == "台湾省", ("province")] = "台湾"
province_cure_data.loc[province_cure_data["province"] == "重庆市", ("province")] = "重庆"
province_cure_data.loc[province_cure_data["province"] == "上海市", ("province")] = "上海"
province_cure_data.loc[province_cure_data["province"] == "天津市", ("province")] = "天津"
province_cure_data.loc[province_cure_data["province"] == "北京市", ("province")] = "北京"
province_cure_data.loc[province_cure_data["province"] == "黑龙江省", ("province")] = "黑龙江"
province_cure_data.loc[province_cure_data["province"] == "西藏自治区", ("province")] = "西藏"
province_cure_data.loc[province_cure_data["province"] == "内蒙古自治区", ("province")] = "内蒙古"
province_cure_data.loc[province_cure_data["province"] == "宁夏回族自治区", ("province")] = "宁夏"
province_cure_data.loc[province_cure_data["province"] == "广西壮族自治区", ("province")] = "广西"
province_cure_data.loc[province_cure_data["province"] == "香港特别行政区", ("province")] = "香港"
province_cure_data.loc[province_cure_data["province"] == "澳门特别行政区", ("province")] = "澳门"
province_cure_data.loc[province_cure_data["province"] == "新疆维吾尔自治区", ("province")] = "新疆"
pcured = pd.DataFrame(columns = ["Date",
                                 "北京",
                                 "天津",
                                 "上海",
                                 "重庆",
                                 "内蒙古",
                                 "新疆",
                                 "西藏",
                                 "宁夏",
                                 "广西",
                                 "香港",
                                 "澳门",
                                 "黑龙江",
                                 "吉林",
                                 "辽宁",
                                 "河北",
                                 "山西",
                                 "青海",
                                 "山东",
                                 "河南",
                                 "江苏",
                                 "安徽",
                                 "浙江",
                                 "福建",
                                 "江西",
                                 "湖南",
                                 "湖北",
                                 "广东",
                                 "台湾",
                                 "海南",
                                 "甘肃",
                                 "陕西",
                                 "四川",
                                 "贵州",
                                 "云南"])
for index, row in province_cure_data.iterrows():
    flag = 0
    for index1, row1 in pcured.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            pcured.loc[index1, row["province"]] = row["cured"]
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["province"]: row["cured"]})
    pcured = pcured.append(temp, ignore_index = True)
pcured = pcured.fillna(0)
pcured["Total"] = pcured["北京"] + pcured["天津"] + pcured["上海"] + pcured["重庆"] + pcured["内蒙古"] + pcured["新疆"] + pcured["西藏"]
pcured["Total"] += pcured["宁夏"] + pcured["广西"] + pcured["香港"] + pcured["澳门"] + pcured["黑龙江"] + pcured["吉林"] + pcured["辽宁"]
pcured["Total"] += pcured["河北"] + pcured["山西"] + pcured["青海"] + pcured["山东"] + pcured["河南"] + pcured["江苏"] + pcured["安徽"]
pcured["Total"] += pcured["浙江"] + pcured["福建"] + pcured["江西"] + pcured["湖南"] + pcured["湖北"] + pcured["广东"] + pcured["台湾"]
pcured["Total"] += pcured["海南"] + pcured["甘肃"] + pcured["陕西"] + pcured["四川"] + pcured["贵州"] + pcured["云南"]
pcured

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南,Total
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,938,292,1288,583,306,977,1,75,260,...,63633,1953,572,165,179,471,774,145,210,87765
370,2020-12-05,939,292,1291,583,306,977,1,75,260,...,63633,1956,574,165,179,474,776,145,210,87827
371,2020-12-06,939,292,1294,583,306,977,1,75,260,...,63635,1958,574,165,179,475,777,145,210,87895
372,2020-12-07,939,292,1299,583,308,977,1,75,260,...,63635,1960,574,165,180,478,779,145,210,87971


In [34]:
for i in range(40):
    pcured = pcured.drop(i)

In [35]:
pcured

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南,Total
40,2020-01-10,0,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,2
41,2020-01-11,0,0,0,0,0,0,0,0,0,...,6,0,0,0,0,0,0,0,0,6
42,2020-01-12,0,0,0,0,0,0,0,0,0,...,7,0,0,0,0,0,0,0,0,7
43,2020-01-13,0,0,0,0,0,0,0,0,0,...,7,0,0,0,0,0,0,0,0,7
44,2020-01-14,0,0,0,0,0,0,0,0,0,...,7,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,938,292,1288,583,306,977,1,75,260,...,63633,1953,572,165,179,471,774,145,210,87765
370,2020-12-05,939,292,1291,583,306,977,1,75,260,...,63633,1956,574,165,179,474,776,145,210,87827
371,2020-12-06,939,292,1294,583,306,977,1,75,260,...,63635,1958,574,165,179,475,777,145,210,87895
372,2020-12-07,939,292,1299,583,308,977,1,75,260,...,63635,1960,574,165,180,478,779,145,210,87971


In [36]:
pcured.to_csv("pcured.csv")

### Part 2.3: Dead

In [37]:
province_dead_data = province_data
province_dead_data = province_dead_data.drop(columns = ["confirmed", "nowc", "cured"])
province_dead_data.loc[province_dead_data["province"] == "湖北省", ("province")] = "湖北"
province_dead_data.loc[province_dead_data["province"] == "湖南省", ("province")] = "湖南"
province_dead_data.loc[province_dead_data["province"] == "河北省", ("province")] = "河北"
province_dead_data.loc[province_dead_data["province"] == "河南省", ("province")] = "河南"
province_dead_data.loc[province_dead_data["province"] == "江西省", ("province")] = "江西"
province_dead_data.loc[province_dead_data["province"] == "江苏省", ("province")] = "江苏"
province_dead_data.loc[province_dead_data["province"] == "广东省", ("province")] = "广东"
province_dead_data.loc[province_dead_data["province"] == "吉林省", ("province")] = "吉林"
province_dead_data.loc[province_dead_data["province"] == "浙江省", ("province")] = "浙江"
province_dead_data.loc[province_dead_data["province"] == "安徽省", ("province")] = "安徽"
province_dead_data.loc[province_dead_data["province"] == "山东省", ("province")] = "山东"
province_dead_data.loc[province_dead_data["province"] == "海南省", ("province")] = "海南"
province_dead_data.loc[province_dead_data["province"] == "四川省", ("province")] = "四川"
province_dead_data.loc[province_dead_data["province"] == "贵州省", ("province")] = "贵州"
province_dead_data.loc[province_dead_data["province"] == "云南省", ("province")] = "云南"
province_dead_data.loc[province_dead_data["province"] == "辽宁省", ("province")] = "辽宁"
province_dead_data.loc[province_dead_data["province"] == "福建省", ("province")] = "福建"
province_dead_data.loc[province_dead_data["province"] == "山西省", ("province")] = "山西"
province_dead_data.loc[province_dead_data["province"] == "陕西省", ("province")] = "陕西"
province_dead_data.loc[province_dead_data["province"] == "甘肃省", ("province")] = "甘肃"
province_dead_data.loc[province_dead_data["province"] == "青海省", ("province")] = "青海"
province_dead_data.loc[province_dead_data["province"] == "台湾省", ("province")] = "台湾"
province_dead_data.loc[province_dead_data["province"] == "重庆市", ("province")] = "重庆"
province_dead_data.loc[province_dead_data["province"] == "上海市", ("province")] = "上海"
province_dead_data.loc[province_dead_data["province"] == "天津市", ("province")] = "天津"
province_dead_data.loc[province_dead_data["province"] == "北京市", ("province")] = "北京"
province_dead_data.loc[province_dead_data["province"] == "黑龙江省", ("province")] = "黑龙江"
province_dead_data.loc[province_dead_data["province"] == "西藏自治区", ("province")] = "西藏"
province_dead_data.loc[province_dead_data["province"] == "内蒙古自治区", ("province")] = "内蒙古"
province_dead_data.loc[province_dead_data["province"] == "宁夏回族自治区", ("province")] = "宁夏"
province_dead_data.loc[province_dead_data["province"] == "广西壮族自治区", ("province")] = "广西"
province_dead_data.loc[province_dead_data["province"] == "香港特别行政区", ("province")] = "香港"
province_dead_data.loc[province_dead_data["province"] == "澳门特别行政区", ("province")] = "澳门"
province_dead_data.loc[province_dead_data["province"] == "新疆维吾尔自治区", ("province")] = "新疆"
pdeadd = pd.DataFrame(columns = ["Date",
                                 "北京",
                                 "天津",
                                 "上海",
                                 "重庆",
                                 "内蒙古",
                                 "新疆",
                                 "西藏",
                                 "宁夏",
                                 "广西",
                                 "香港",
                                 "澳门",
                                 "黑龙江",
                                 "吉林",
                                 "辽宁",
                                 "河北",
                                 "山西",
                                 "青海",
                                 "山东",
                                 "河南",
                                 "江苏",
                                 "安徽",
                                 "浙江",
                                 "福建",
                                 "江西",
                                 "湖南",
                                 "湖北",
                                 "广东",
                                 "台湾",
                                 "海南",
                                 "甘肃",
                                 "陕西",
                                 "四川",
                                 "贵州",
                                 "云南"])
for index, row in province_dead_data.iterrows():
    flag = 0
    for index1, row1 in pdeadd.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            pdeadd.loc[index1, row["province"]] = row["dead"]
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["province"]: row["dead"]})
    pdeadd = pdeadd.append(temp, ignore_index = True)
pdeadd = pdeadd.fillna(0)
pdeadd["Total"] = pdeadd["北京"] + pdeadd["天津"] + pdeadd["上海"] + pdeadd["重庆"] + pdeadd["内蒙古"] + pdeadd["新疆"] + pdeadd["西藏"]
pdeadd["Total"] += pdeadd["宁夏"] + pdeadd["广西"] + pdeadd["香港"] + pdeadd["澳门"] + pdeadd["黑龙江"] + pdeadd["吉林"] + pdeadd["辽宁"]
pdeadd["Total"] += pdeadd["河北"] + pdeadd["山西"] + pdeadd["青海"] + pdeadd["山东"] + pdeadd["河南"] + pdeadd["江苏"] + pdeadd["安徽"]
pdeadd["Total"] += pdeadd["浙江"] + pdeadd["福建"] + pdeadd["江西"] + pdeadd["湖南"] + pdeadd["湖北"] + pdeadd["广东"] + pdeadd["台湾"]
pdeadd["Total"] += pdeadd["海南"] + pdeadd["甘肃"] + pdeadd["陕西"] + pdeadd["四川"] + pdeadd["贵州"] + pdeadd["云南"]
for i in range(40):
    pdeadd = pdeadd.drop(i)
pdeadd

Unnamed: 0,Date,北京,天津,上海,重庆,内蒙古,新疆,西藏,宁夏,广西,...,湖北,广东,台湾,海南,甘肃,陕西,四川,贵州,云南,Total
40,2020-01-10,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
41,2020-01-11,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
42,2020-01-12,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
43,2020-01-13,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
44,2020-01-14,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,9,3,7,6,1,3,0,0,2,...,4512,8,7,6,2,3,3,2,2,4753
370,2020-12-05,9,3,7,6,1,3,0,0,2,...,4512,8,7,6,2,3,3,2,2,4753
371,2020-12-06,9,3,7,6,1,3,0,0,2,...,4512,8,7,6,2,3,3,2,2,4753
372,2020-12-07,9,3,7,6,1,3,0,0,2,...,4512,8,7,6,2,3,3,2,2,4753


In [38]:
pdeadd.to_csv("pdeadd.csv")

## Part 3: World

### Part 3.1: Total Confirmed

In [39]:
world_conf_data = world_data
world_conf_data = world_conf_data.drop(columns = ["cured", "dead", "nowc"])

In [40]:
world_conf_data

Unnamed: 0,date,country,countryCode,confirmed
0,2019-12-01,中国,CN,1
1,2019-12-02,中国,CN,1
2,2019-12-03,中国,CN,1
3,2019-12-04,中国,CN,1
4,2019-12-05,中国,CN,1
...,...,...,...,...
58976,2020-12-08,马约特,YT,5181
58977,2020-12-08,南非,ZA,817878
58978,2020-12-08,赞比亚,ZM,17931
58979,2020-12-08,津巴布韦,ZW,10839


In [41]:
world_conf_data["country"].value_counts()

中国       374
日本       323
泰国       323
韩国       322
美国       320
        ... 
南苏丹      246
也门       243
科摩罗      223
塔吉克斯坦    222
莱索托      210
Name: country, Length: 213, dtype: int64

In [42]:
wconfd = pd.DataFrame()

In [43]:
wconfd

In [44]:
for index, row in world_conf_data.iterrows():
    flag = 0
    for index1, row1 in wconfd.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            wconfd.loc[index1, row["country"]] = int(row["confirmed"])
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["country"]: int(row["confirmed"])})
    wconfd = wconfd.append(temp, ignore_index = True)

In [45]:
wconfd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,布隆迪,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托
0,2019-12-01,1.0,,,,,,,,,...,,,,,,,,,,
1,2019-12-02,1.0,,,,,,,,,...,,,,,,,,,,
2,2019-12-03,1.0,,,,,,,,,...,,,,,,,,,,
3,2019-12-04,1.0,,,,,,,,,...,,,,,,,,,,
4,2019-12-05,1.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023.0,158402.0,4053.0,36332.0,14542044.0,58242.0,1361.0,2257331.0,238861.0,...,692.0,6043.0,16.0,17.0,997.0,3154.0,2239.0,615.0,12308.0,2145.0
370,2020-12-05,94142.0,160906.0,4072.0,36915.0,14775308.0,58255.0,1365.0,2268552.0,239885.0,...,692.0,6047.0,16.0,17.0,999.0,3154.0,2267.0,615.0,12349.0,2150.0
371,2020-12-06,94278.0,162941.0,4086.0,37546.0,14985142.0,58260.0,1366.0,2281475.0,240981.0,...,694.0,6049.0,16.0,17.0,999.0,3166.0,2304.0,616.0,12428.0,2150.0
372,2020-12-07,94373.0,164446.0,4107.0,38161.0,15169648.0,58273.0,1367.0,2292497.0,241995.0,...,694.0,6051.0,16.0,17.0,999.0,3181.0,2383.0,616.0,12469.0,2150.0


In [46]:
wconfd = wconfd.fillna(0)

In [47]:
wconfd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,布隆迪,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托
0,2019-12-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-12-02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-12-03,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-12-04,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-12-05,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023.0,158402.0,4053.0,36332.0,14542044.0,58242.0,1361.0,2257331.0,238861.0,...,692.0,6043.0,16.0,17.0,997.0,3154.0,2239.0,615.0,12308.0,2145.0
370,2020-12-05,94142.0,160906.0,4072.0,36915.0,14775308.0,58255.0,1365.0,2268552.0,239885.0,...,692.0,6047.0,16.0,17.0,999.0,3154.0,2267.0,615.0,12349.0,2150.0
371,2020-12-06,94278.0,162941.0,4086.0,37546.0,14985142.0,58260.0,1366.0,2281475.0,240981.0,...,694.0,6049.0,16.0,17.0,999.0,3166.0,2304.0,616.0,12428.0,2150.0
372,2020-12-07,94373.0,164446.0,4107.0,38161.0,15169648.0,58273.0,1367.0,2292497.0,241995.0,...,694.0,6051.0,16.0,17.0,999.0,3181.0,2383.0,616.0,12469.0,2150.0


In [48]:
wconfd.dtypes

Date      object
中国       float64
日本       float64
泰国       float64
韩国       float64
          ...   
南苏丹      float64
也门       float64
科摩罗      float64
塔吉克斯坦    float64
莱索托      float64
Length: 214, dtype: object

In [49]:
for i in wconfd.columns:
    if i != "Date":
        wconfd[[i]] = wconfd[[i]].astype("int")

In [50]:
wconfd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,布隆迪,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托
0,2019-12-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2019-12-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2019-12-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2019-12-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-12-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023,158402,4053,36332,14542044,58242,1361,2257331,238861,...,692,6043,16,17,997,3154,2239,615,12308,2145
370,2020-12-05,94142,160906,4072,36915,14775308,58255,1365,2268552,239885,...,692,6047,16,17,999,3154,2267,615,12349,2150
371,2020-12-06,94278,162941,4086,37546,14985142,58260,1366,2281475,240981,...,694,6049,16,17,999,3166,2304,616,12428,2150
372,2020-12-07,94373,164446,4107,38161,15169648,58273,1367,2292497,241995,...,694,6051,16,17,999,3181,2383,616,12469,2150


In [51]:
wconfd["Total"] = 0
for i in wconfd.columns:
    if i != "Date" and i != "Total":
        wconfd["Total"] += wconfd[i]

In [52]:
wconfd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
0,2019-12-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2019-12-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2019-12-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2019-12-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,2019-12-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023,158402,4053,36332,14542044,58242,1361,2257331,238861,...,6043,16,17,997,3154,2239,615,12308,2145,65813537
370,2020-12-05,94142,160906,4072,36915,14775308,58255,1365,2268552,239885,...,6047,16,17,999,3154,2267,615,12349,2150,66486862
371,2020-12-06,94278,162941,4086,37546,14985142,58260,1366,2281475,240981,...,6049,16,17,999,3166,2304,616,12428,2150,67107528
372,2020-12-07,94373,164446,4107,38161,15169648,58273,1367,2292497,241995,...,6051,16,17,999,3181,2383,616,12469,2150,67644753


In [53]:
wconfd.to_csv("wconfd.csv")

In [54]:
wconfmp = wconfd

In [55]:
wconfmp

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
0,2019-12-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2019-12-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2019-12-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2019-12-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,2019-12-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023,158402,4053,36332,14542044,58242,1361,2257331,238861,...,6043,16,17,997,3154,2239,615,12308,2145,65813537
370,2020-12-05,94142,160906,4072,36915,14775308,58255,1365,2268552,239885,...,6047,16,17,999,3154,2267,615,12349,2150,66486862
371,2020-12-06,94278,162941,4086,37546,14985142,58260,1366,2281475,240981,...,6049,16,17,999,3166,2304,616,12428,2150,67107528
372,2020-12-07,94373,164446,4107,38161,15169648,58273,1367,2292497,241995,...,6051,16,17,999,3181,2383,616,12469,2150,67644753


In [56]:
NameList = {
  "Somalia": "索马里",
  "Liechtenstein": "列支敦士登",
  "Morocco": "摩洛哥",
  "W. Sahara": "西撒哈拉",
  "Serbia": "塞尔维亚",
  "Afghanistan": "阿富汗",
  "Angola": "安哥拉",
  "Albania": "阿尔巴尼亚",
  "Andorra": "安道尔",
  "United Arab Emirates": "阿联酋",
  "Argentina": "阿根廷",
  "Armenia": "亚美尼亚",
  "Australia": "澳大利亚",
  "Austria": "奥地利",
  "Azerbaijan": "阿塞拜疆",
  "Burundi": "布隆迪",
  "Belgium": "比利时",
  "Benin": "贝宁",
  "Burkina Faso": "布基纳法索",
  "Bangladesh": "孟加拉国",
  "Bulgaria": "保加利亚",
  "Bahrain": "巴林",
  "Bahamas": "巴哈马",
  "Bosnia and Herz.": "波黑",
  "Belarus": "白俄罗斯",
  "Belize": "伯利兹",
  "Bermuda": "百慕大",
  "Bolivia": "玻利维亚",
  "Brazil": "巴西",
  "Barbados": "巴巴多斯",
  "Brunei": "文莱",
  "Bhutan": "不丹",
  "Botswana": "博茨瓦纳",
  "Central African Rep.": "中非",
  "Canada": "加拿大",
  "Switzerland": "瑞士",
  "Chile": "智利",
  "China": "中国",
  "Côte d'Ivoire": "科特迪瓦",
  "Cameroon": "喀麦隆",
  "Dem. Rep. Congo": "刚果（金）",
  "Congo": "刚果（布）",
  "Colombia": "哥伦比亚",
  "Cape Verde": "佛得角",
  "Costa Rica": "哥斯达黎加",
  "Cuba": "古巴",
  "N. Cyprus": "北塞浦路斯",
  "Cyprus": "塞浦路斯",
  "Czech Rep.": "捷克",
  "Germany": "德国",
  "Djibouti": "吉布提",
  "Denmark": "丹麦",
  "Dominican Rep.": "多米尼克",
  "Algeria": "阿尔及利亚",
  "Ecuador": "厄瓜多尔",
  "Egypt": "埃及",
  "Eritrea": "厄立特里亚",
  "Spain": "西班牙",
  "Estonia": "爱沙尼亚",
  "Ethiopia": "埃塞俄比亚",
  "Finland": "芬兰",
  "Fiji": "斐济",
  "France": "法国",
  "Gabon": "加蓬",
  "United Kingdom": "英国",
  "Georgia": "格鲁吉亚",
  "Ghana": "加纳",
  "Guinea": "几内亚",
  "Gambia": "冈比亚",
  "Guinea-Bissau": "几内亚比绍",
  "Eq. Guinea": "赤道几内亚",
  "Greece": "希腊",
  "Grenada": "格林纳达",
  "Greenland": "格陵兰",
  "Guatemala": "危地马拉",
  "Guam": "关岛",
  "Guyana": "圭亚那",
  "Honduras": "洪都拉斯",
  "Croatia": "克罗地亚",
  "Haiti": "海地",
  "Hungary": "匈牙利",
  "Indonesia": "印度尼西亚",
  "India": "印度",
  "Br. Indian Ocean Ter.": "英属印度洋领土",
  "Ireland": "爱尔兰",
  "Iran": "伊朗",
  "Iraq": "伊拉克",
  "Iceland": "冰岛",
  "Israel": "以色列",
  "Italy": "意大利",
  "Jamaica": "牙买加",
  "Jordan": "约旦",
  "Japan": "日本",
  "Siachen Glacier": "锡亚琴冰川",
  "Kazakhstan": "哈萨克斯坦",
  "Kenya": "肯尼亚",
  "Kyrgyzstan": "吉尔吉斯斯坦",
  "Cambodia": "柬埔寨",
  "Korea": "韩国",
  "Kuwait": "科威特",
  "Lao PDR": "老挝",
  "Lebanon": "黎巴嫩",
  "Liberia": "利比里亚",
  "Libya": "利比亚",
  "Sri Lanka": "斯里兰卡",
  "Lesotho": "莱索托",
  "Lithuania": "立陶宛",
  "Luxembourg": "卢森堡",
  "Latvia": "拉脱维亚",
  "Moldova": "摩尔多瓦",
  "Madagascar": "马达加斯加",
  "Mexico": "墨西哥",
  "Macedonia": "北马其顿",
  "Mali": "马里",
  "Malta": "马耳他",
  "Myanmar": "缅甸",
  "Montenegro": "黑山",
  "Mongolia": "蒙古",
  "Mozambique": "莫桑比克",
  "Mauritania": "毛利塔尼亚",
  "Mauritius": "毛里求斯",
  "Malawi": "马拉维",
  "Malaysia": "马来西亚",
  "Namibia": "纳米比亚",
  "New Caledonia": "新喀里多尼亚",
  "Niger": "尼日尔",
  "Nigeria": "尼日利亚",
  "Nicaragua": "尼加拉瓜",
  "Netherlands": "荷兰",
  "Norway": "挪威",
  "Nepal": "尼泊尔",
  "New Zealand": "新西兰",
  "Oman": "阿曼",
  "Pakistan": "巴基斯坦",
  "Panama": "巴拿马",
  "Peru": "秘鲁",
  "Philippines": "菲律宾",
  "Papua New Guinea": "巴布亚新几内亚",
  "Poland": "波兰",
  "Puerto Rico": "波多黎各",
  "Dem. Rep. Korea": "朝鲜",
  "Portugal": "葡萄牙",
  "Paraguay": "巴拉圭",
  "Palestine": "巴勒斯坦",
  "Qatar": "卡塔尔",
  "Romania": "罗马尼亚",
  "Russia": "俄罗斯",
  "Rwanda": "卢旺达",
  "Saudi Arabia": "沙特阿拉伯",
  "Sudan": "苏丹",
  "S. Sudan": "南苏丹",
  "Senegal": "塞内加尔",
  "Singapore": "新加坡",
  "Solomon Is.": "所罗门群岛",
  "Sierra Leone": "塞拉利昂",
  "El Salvador": "萨尔瓦多",
  "Suriname": "苏里南",
  "Slovakia": "斯洛伐克",
  "Slovenia": "斯洛文尼亚",
  "Sweden": "瑞典",
  "Swaziland": "斯威士兰",
  "Seychelles": "塞舌尔",
  "Syria": "叙利亚",
  "Chad": "乍得",
  "Togo": "多哥",
  "Thailand": "泰国",
  "Tajikistan": "塔吉克斯坦",
  "Turkmenistan": "土库曼斯坦",
  "Timor-Leste": "东帝汶",
  "Tonga": "汤加",
  "Trinidad and Tobago": "特立尼达和多巴哥",
  "Tunisia": "突尼斯",
  "Turkey": "土耳其",
  "Tanzania": "坦桑尼亚",
  "Uganda": "乌干达",
  "Ukraine": "乌克兰",
  "Uruguay": "乌拉圭",
  "United States": "美国",
  "Uzbekistan": "乌兹别克斯坦",
  "Venezuela": "委内瑞拉",
  "Vietnam": "越南",
  "Vanuatu": "瓦努阿图",
  "Yemen": "也门",
  "South Africa": "南非",
  "Zambia": "赞比亚",
  "Zimbabwe": "津巴布韦",
  "Aland": "奥兰群岛",
  "American Samoa": "美属萨摩亚",
  "Fr. S. Antarctic Lands": "南极洲",
  "Antigua and Barb.": "安提瓜和巴布达",
  "Comoros": "科摩罗",
  "Curaçao": "库拉索岛",
  "Cayman Is.": "开曼群岛",
  "Dominica": "多米尼加",
  "Falkland Is.": "福克兰群岛（马尔维纳斯）",
  "Faeroe Is.": "法罗群岛",
  "Micronesia": "密克罗尼西亚",
  "Heard I. and McDonald Is.": "赫德岛和麦克唐纳群岛",
  "Isle of Man": "英国属地曼岛",
  "Jersey": "泽西岛",
  "Kiribati": "基里巴斯",
  "Saint Lucia": "圣卢西亚",
  "N. Mariana Is.": "北马里亚纳",
  "Montserrat": "蒙特塞拉特",
  "Niue": "纽埃",
  "Palau": "帕劳",
  "Fr. Polynesia": "法属波利尼西亚",
  "S. Geo. and S. Sandw. Is.": "南乔治亚岛和南桑威奇群岛",
  "Saint Helena": "圣赫勒拿",
  "St. Pierre and Miquelon": "圣皮埃尔和密克隆",
  "São Tomé and Principe": "圣多美和普林西比",
  "Turks and Caicos Is.": "特克斯和凯科斯群岛",
  "St. Vin. and Gren.": "圣文森特和格林纳丁斯",
  "U.S. Virgin Is.": "美属维尔京群岛",
  "Samoa": "萨摩亚"
}
namelist = {value:key for key,value in NameList.items()}
namelist

{'索马里': 'Somalia',
 '列支敦士登': 'Liechtenstein',
 '摩洛哥': 'Morocco',
 '西撒哈拉': 'W. Sahara',
 '塞尔维亚': 'Serbia',
 '阿富汗': 'Afghanistan',
 '安哥拉': 'Angola',
 '阿尔巴尼亚': 'Albania',
 '安道尔': 'Andorra',
 '阿联酋': 'United Arab Emirates',
 '阿根廷': 'Argentina',
 '亚美尼亚': 'Armenia',
 '澳大利亚': 'Australia',
 '奥地利': 'Austria',
 '阿塞拜疆': 'Azerbaijan',
 '布隆迪': 'Burundi',
 '比利时': 'Belgium',
 '贝宁': 'Benin',
 '布基纳法索': 'Burkina Faso',
 '孟加拉国': 'Bangladesh',
 '保加利亚': 'Bulgaria',
 '巴林': 'Bahrain',
 '巴哈马': 'Bahamas',
 '波黑': 'Bosnia and Herz.',
 '白俄罗斯': 'Belarus',
 '伯利兹': 'Belize',
 '百慕大': 'Bermuda',
 '玻利维亚': 'Bolivia',
 '巴西': 'Brazil',
 '巴巴多斯': 'Barbados',
 '文莱': 'Brunei',
 '不丹': 'Bhutan',
 '博茨瓦纳': 'Botswana',
 '中非': 'Central African Rep.',
 '加拿大': 'Canada',
 '瑞士': 'Switzerland',
 '智利': 'Chile',
 '中国': 'China',
 '科特迪瓦': "Côte d'Ivoire",
 '喀麦隆': 'Cameroon',
 '刚果（金）': 'Dem. Rep. Congo',
 '刚果（布）': 'Congo',
 '哥伦比亚': 'Colombia',
 '佛得角': 'Cape Verde',
 '哥斯达黎加': 'Costa Rica',
 '古巴': 'Cuba',
 '北塞浦路斯': 'N. Cyprus',
 '塞浦路斯': 'Cyprus

In [57]:
wconfmp = wconfmp.rename(columns = namelist)
wconfmp = wconfmp.drop(columns = ["摩纳哥", #小国
                                  "圣马力诺",
                                  "直布罗陀",
                                  "梵蒂冈",
                                  "马尔代夫",
                                  "阿鲁巴", 
                                  "圣基茨和尼维斯",
                                  
                                  "钻石公主号邮轮", #非国
                                  
                                  "圣马丁", #法国
                                  "圣巴泰勒米",
                                  "法属圭亚那",
                                  "马提尼克",
                                  "留尼汪",
                                  "瓜德罗普",
                                  "马约特",
                                  
                                  "格恩西岛", #英国
                                  "安圭拉",
                                  "英属维尔京群岛",
                                  
                                  "荷属安的列斯", #荷兰
                                 ])

In [58]:
wconfmp

Unnamed: 0,Date,China,Japan,Thailand,Korea,United States,Singapore,Vietnam,France,Nepal,...,Malawi,St. Pierre and Miquelon,Falkland Is.,São Tomé and Principe,S. Sudan,Yemen,Comoros,Tajikistan,Lesotho,Total
0,2019-12-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2019-12-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2019-12-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2019-12-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,2019-12-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,94023,158402,4053,36332,14542044,58242,1361,2257331,238861,...,6043,16,17,997,3154,2239,615,12308,2145,65813537
370,2020-12-05,94142,160906,4072,36915,14775308,58255,1365,2268552,239885,...,6047,16,17,999,3154,2267,615,12349,2150,66486862
371,2020-12-06,94278,162941,4086,37546,14985142,58260,1366,2281475,240981,...,6049,16,17,999,3166,2304,616,12428,2150,67107528
372,2020-12-07,94373,164446,4107,38161,15169648,58273,1367,2292497,241995,...,6051,16,17,999,3181,2383,616,12469,2150,67644753


In [59]:
colu = wconfmp.columns.to_list()
colu.remove("Date")
colu.remove("Total")
colu

['China',
 'Japan',
 'Thailand',
 'Korea',
 'United States',
 'Singapore',
 'Vietnam',
 'France',
 'Nepal',
 'Australia',
 'Malaysia',
 'Canada',
 'Pakistan',
 'Germany',
 'Sri Lanka',
 'United Arab Emirates',
 'Finland',
 'India',
 'Philippines',
 'Belgium',
 'Spain',
 'United Kingdom',
 'Italy',
 'Cambodia',
 'Russia',
 'Sweden',
 'Egypt',
 'Iran',
 'Lebanon',
 'Israel',
 'Iraq',
 'Afghanistan',
 'Bahrain',
 'Kuwait',
 'Austria',
 'Switzerland',
 'Croatia',
 'Oman',
 'Brazil',
 'Algeria',
 'Greece',
 'Denmark',
 'Estonia',
 'Georgia',
 'Macedonia',
 'Norway',
 'Romania',
 'Azerbaijan',
 'Belarus',
 'Lithuania',
 'Mexico',
 'Nigeria',
 'Netherlands',
 'New Zealand',
 'Iceland',
 'Qatar',
 'Armenia',
 'Ecuador',
 'Ireland',
 'Luxembourg',
 'Andorra',
 'Czech Rep.',
 'Dominica',
 'Indonesia',
 'Jordan',
 'Portugal',
 'Latvia',
 'Morocco',
 'Saudi Arabia',
 'Senegal',
 'Tunisia',
 'Ukraine',
 'Argentina',
 'Chile',
 'Poland',
 'Bosnia and Herz.',
 'Hungary',
 'Slovenia',
 'South Africa',

In [60]:
wconfmp.to_csv("wconfmp.csv")

### Part 3.2: Now-Confirmed

In [61]:
world_ncon_data = world_data
world_ncon_data = world_ncon_data.drop(columns = ["cured", "dead", "confirmed"])
wncond = pd.DataFrame()
for index, row in world_ncon_data.iterrows():
    flag = 0
    for index1, row1 in wncond.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            wncond.loc[index1, row["country"]] = int(row["nowc"])
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["country"]: int(row["nowc"])})
    wncond = wncond.append(temp, ignore_index = True)
wncond = wncond.fillna(0)
for i in wncond.columns:
    if i != "Date":
        wncond[[i]] = wncond[[i]].astype("int")
wncond["Total"] = 0
for i in wncond.columns:
    if i != "Date" and i != "Total":
        wncond["Total"] += wncond[i]
wncond

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
0,2019-12-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2019-12-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2019-12-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2019-12-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,2019-12-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,1505,24925,154,7185,5690451,68,106,2036251,15447,...,386,2,2,43,115,90,18,503,823,18978091
370,2020-12-05,1562,25144,164,7458,5826037,74,110,2045433,14255,...,390,2,2,45,115,106,18,504,828,19228119
371,2020-12-06,1630,27139,173,7873,5907594,73,111,2057136,13582,...,388,2,2,45,127,124,9,508,828,19416052
372,2020-12-07,1649,25145,179,8311,6021199,84,108,2067756,12948,...,390,2,2,45,142,180,9,501,828,19617063


In [62]:
wncond.to_csv("wncond.csv")

In [63]:
wnconmp = wncond
wnconmp = wnconmp.rename(columns = namelist)
wnconmp = wnconmp.drop(columns = ["摩纳哥", #小国
                                  "圣马力诺",
                                  "直布罗陀",
                                  "梵蒂冈",
                                  "马尔代夫",
                                  "阿鲁巴", 
                                  "圣基茨和尼维斯",
                                  
                                  "钻石公主号邮轮", #非国
                                  
                                  "圣马丁", #法国
                                  "圣巴泰勒米",
                                  "法属圭亚那",
                                  "马提尼克",
                                  "留尼汪",
                                  "瓜德罗普",
                                  "马约特",
                                  
                                  "格恩西岛", #英国
                                  "安圭拉",
                                  "英属维尔京群岛",
                                  
                                  "荷属安的列斯", #荷兰
                                 ])

In [64]:
wnconmp.to_csv("wnconmp.csv")

### Part 3.3: Cured

In [65]:
world_cure_data = world_data
world_cure_data = world_cure_data.drop(columns = ["nowc", "dead", "confirmed"])
wcured = pd.DataFrame()
for index, row in world_cure_data.iterrows():
    flag = 0
    for index1, row1 in wcured.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            wcured.loc[index1, row["country"]] = int(row["cured"])
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["country"]: int(row["cured"])})
    wcured = wcured.append(temp, ignore_index = True)
wcured = wcured.fillna(0)
for i in wcured.columns:
    if i != "Date":
        wcured[[i]] = wcured[[i]].astype("int")
wcured["Total"] = 0
for i in wcured.columns:
    if i != "Date" and i != "Total":
        wcured["Total"] += wcured[i]
wcured

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,87765,131176,3839,28611,8568618,58145,1220,166940,221847,...,5472,14,15,937,2977,1525,590,11718,1278,45318936
370,2020-12-05,87827,133443,3848,28917,8663603,58152,1220,168352,224053,...,5472,14,15,937,2977,1534,590,11758,1278,45729914
371,2020-12-06,87895,133443,3853,29128,8789706,58158,1220,169358,225805,...,5476,14,15,937,2977,1547,600,11833,1278,46151521
372,2020-12-07,87971,136903,3868,29301,8859465,58160,1224,169586,227433,...,5476,14,15,937,2977,1554,600,11881,1278,46482175


In [66]:
for i in range(40):
    wcured = wcured.drop(i)

In [67]:
wcured

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
40,2020-01-10,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
41,2020-01-11,6,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
42,2020-01-12,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
43,2020-01-13,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
44,2020-01-14,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,87765,131176,3839,28611,8568618,58145,1220,166940,221847,...,5472,14,15,937,2977,1525,590,11718,1278,45318936
370,2020-12-05,87827,133443,3848,28917,8663603,58152,1220,168352,224053,...,5472,14,15,937,2977,1534,590,11758,1278,45729914
371,2020-12-06,87895,133443,3853,29128,8789706,58158,1220,169358,225805,...,5476,14,15,937,2977,1547,600,11833,1278,46151521
372,2020-12-07,87971,136903,3868,29301,8859465,58160,1224,169586,227433,...,5476,14,15,937,2977,1554,600,11881,1278,46482175


In [68]:
wcured.to_csv("wcured.csv")

In [69]:
wcuremp = wcured
wcuremp = wcuremp.rename(columns = namelist)
wcuremp = wcuremp.drop(columns = ["摩纳哥", #小国
                                  "圣马力诺",
                                  "直布罗陀",
                                  "梵蒂冈",
                                  "马尔代夫",
                                  "阿鲁巴", 
                                  "圣基茨和尼维斯",
                                  
                                  "钻石公主号邮轮", #非国
                                  
                                  "圣马丁", #法国
                                  "圣巴泰勒米",
                                  "法属圭亚那",
                                  "马提尼克",
                                  "留尼汪",
                                  "瓜德罗普",
                                  "马约特",
                                  
                                  "格恩西岛", #英国
                                  "安圭拉",
                                  "英属维尔京群岛",
                                  
                                  "荷属安的列斯", #荷兰
                                 ])

In [70]:
wcuremp

Unnamed: 0,Date,China,Japan,Thailand,Korea,United States,Singapore,Vietnam,France,Nepal,...,Malawi,St. Pierre and Miquelon,Falkland Is.,São Tomé and Principe,S. Sudan,Yemen,Comoros,Tajikistan,Lesotho,Total
40,2020-01-10,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
41,2020-01-11,6,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
42,2020-01-12,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
43,2020-01-13,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
44,2020-01-14,7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,87765,131176,3839,28611,8568618,58145,1220,166940,221847,...,5472,14,15,937,2977,1525,590,11718,1278,45318936
370,2020-12-05,87827,133443,3848,28917,8663603,58152,1220,168352,224053,...,5472,14,15,937,2977,1534,590,11758,1278,45729914
371,2020-12-06,87895,133443,3853,29128,8789706,58158,1220,169358,225805,...,5476,14,15,937,2977,1547,600,11833,1278,46151521
372,2020-12-07,87971,136903,3868,29301,8859465,58160,1224,169586,227433,...,5476,14,15,937,2977,1554,600,11881,1278,46482175


In [71]:
wcuremp.to_csv("wcuremp.csv")

### Part 3.4: Dead

In [72]:
world_dead_data = world_data
world_dead_data = world_dead_data.drop(columns = ["nowc", "cured", "confirmed"])
wdeadd = pd.DataFrame()
for index, row in world_dead_data.iterrows():
    flag = 0
    for index1, row1 in wdeadd.iterrows():
        if row1["Date"] ==  row["date"]:
            flag = 1
            wdeadd.loc[index1, row["country"]] = int(row["dead"])
            break
    if flag:
        continue
    temp = pd.Series({"Date": row["date"], row["country"]: int(row["dead"])})
    wdeadd = wdeadd.append(temp, ignore_index = True)
wdeadd = wdeadd.fillna(0)
for i in wdeadd.columns:
    if i != "Date":
        wdeadd[[i]] = wdeadd[[i]].astype("int")
wdeadd["Total"] = 0
for i in wdeadd.columns:
    if i != "Date" and i != "Total":
        wdeadd["Total"] += wdeadd[i]
wdeadd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
0,2019-12-01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2019-12-02,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2019-12-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2019-12-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-12-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,4753,2301,60,536,282975,29,35,54140,1567,...,185,0,0,17,62,624,7,87,44,1516510
370,2020-12-05,4753,2319,60,540,285668,29,35,54767,1577,...,185,0,0,17,62,627,7,87,44,1528829
371,2020-12-06,4753,2359,60,545,287842,29,35,54981,1594,...,185,0,0,17,62,633,7,87,44,1539955
372,2020-12-07,4753,2398,60,549,288984,29,35,55155,1614,...,185,0,0,17,62,649,7,87,44,1545515


In [73]:
for i in range(40):
    wdeadd = wdeadd.drop(i)
wdeadd

Unnamed: 0,Date,中国,日本,泰国,韩国,美国,新加坡,越南,法国,尼泊尔,...,马拉维,圣皮埃尔和密克隆,福克兰群岛（马尔维纳斯）,圣多美和普林西比,南苏丹,也门,科摩罗,塔吉克斯坦,莱索托,Total
40,2020-01-10,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
41,2020-01-11,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
42,2020-01-12,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
43,2020-01-13,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
44,2020-01-14,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2020-12-04,4753,2301,60,536,282975,29,35,54140,1567,...,185,0,0,17,62,624,7,87,44,1516510
370,2020-12-05,4753,2319,60,540,285668,29,35,54767,1577,...,185,0,0,17,62,627,7,87,44,1528829
371,2020-12-06,4753,2359,60,545,287842,29,35,54981,1594,...,185,0,0,17,62,633,7,87,44,1539955
372,2020-12-07,4753,2398,60,549,288984,29,35,55155,1614,...,185,0,0,17,62,649,7,87,44,1545515


In [74]:
wdeadd.to_csv("wdeadd.csv")

In [75]:
wdeadmp = wdeadd
wdeadmp = wdeadmp.rename(columns = namelist)
wdeadmp = wdeadmp.drop(columns = ["摩纳哥", #小国
                                  "圣马力诺",
                                  "直布罗陀",
                                  "梵蒂冈",
                                  "马尔代夫",
                                  "阿鲁巴", 
                                  "圣基茨和尼维斯",
                                  
                                  "钻石公主号邮轮", #非国
                                  
                                  "圣马丁", #法国
                                  "圣巴泰勒米",
                                  "法属圭亚那",
                                  "马提尼克",
                                  "留尼汪",
                                  "瓜德罗普",
                                  "马约特",
                                  
                                  "格恩西岛", #英国
                                  "安圭拉",
                                  "英属维尔京群岛",
                                  
                                  "荷属安的列斯", #荷兰
                                 ])

In [76]:
wdeadmp.to_csv("wdeadmp.csv")