-
Notifications
You must be signed in to change notification settings - Fork 0
/
getfiles.py
57 lines (44 loc) · 2.17 KB
/
getfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# this is the sheet to import dataframes of global COVID-19 data
import pandas as pd
import sys
import os
def getsums(df, regionname):
dates = df['date'].unique()
howmany = dates.shape[0]
regionlist = [regionname for i in range(howmany)]
confirmed = df.groupby("date").confirmed.sum()
suspected = df.groupby("date").suspected.sum()
cured = df.groupby("date").cured.sum()
dead = df.groupby("date").dead.sum()
cols = ['date', 'countryCode', 'confirmed', 'suspected', 'cured', 'dead']
if len(dates) == len(regionlist) == len(confirmed) == len(suspected) == len(cured) == len(dead):
try:
newindex = range(0, howmany)
newdf = pd.DataFrame(list(zip(dates, regionlist, confirmed, suspected, cured, dead)),
columns=cols, index=newindex)
return newdf
except:
print('Error creating newdf')
else:
print('no df to return')
def main():
cols = ['date', 'countryCode', 'provinceCode', 'cityCode', 'confirmed',
'suspected', 'cured', 'dead']
if os.getcwd() == '/Users/zeptinc/Google Drive/UAlberta/Wi2020/MATH371/Project/data-changhailan':
df = pd.read_csv('Wuhan-2019-nCov.csv')
elif os.getcwd() == '/Users/zeptinc/Google Drive/UAlberta/Wi2020/MATH371/Project':
df = pd.read_csv('data-changhailan/Wuhan-2019-nCov.csv')
df_cn = df[df['countryCode'] == 'CN'].loc[:, cols]
df_hubei = df[df['provinceCode'] == 420000.0].loc[:, cols]
df_hubei_new = df[df['provinceCode'] == 420000.0 & df['cityCode' == NaN]].loc[:, cols]
df_wuhan = df[df['cityCode'] == '420100'].loc[:, cols]
df_ca = df[df['countryCode'] == 'CA'].loc[:, cols]
df_it = df[df['countryCode'] == 'IT'].loc[:, cols]
df_sk = df[df['countryCode'] == 'KR'].loc[:, cols]
df_sg = df[df['countryCode'] == 'SG'].loc[:, cols]
df_uk = df[df['countryCode'] == 'GB'].loc[:, cols]
df_cn2 = getsums(df_cn, 'CN')
df_hubei2 = getsums(df_hubei, 'Hubei')
return df_hubei, df_cn2, df_hubei2, df_wuhan, df_ca, df_it, df_sk, df_sg, df_uk, df
if __name__ == "__main__":
df_hubei, df_cn2, df_hubei2, df_wuhan, df_ca, df_it, df_sk, df_sg, df_uk, df= main()