Skip to content
This repository has been archived by the owner on Dec 22, 2022. It is now read-only.

Commit

Permalink
Data collection - Peru automated #52 #71
Browse files Browse the repository at this point in the history
  • Loading branch information
pablodz committed May 17, 2020
1 parent 6a63538 commit 20cff47
Show file tree
Hide file tree
Showing 85 changed files with 2,299 additions and 31 deletions.
38 changes: 7 additions & 31 deletions utils/scripts/data_collection/data/brazil_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,18 @@
import sys
import os

if 1 < len(sys.argv) < 3:
init_date = sys.argv[1]
try:
init_date = pd.to_datetime(init_date, format="%Y/%m/%d")
except ValueError:
print("incorrect date format")
sys.exit(0)
else:
print("Usage brazil_data.py <initial_date>\n date format: %Y/%m/%d example: 2020/03/25")
sys.exit(0)

init_date = pd.to_datetime("2020/03/25", format="%Y/%m/%d")

confirmed_url = "https://raw.githubusercontent.com/elhenrico/covid19-Brazil-timeseries/master/confirmed-cases.csv"
deaths_url = "https://raw.githubusercontent.com/elhenrico/covid19-Brazil-timeseries/master/deaths.csv"
dsrp_github="https://raw.githubusercontent.com/DataScienceResearchPeru/covid-19_latinoamerica/master/latam_covid_19_data/daily_reports/2020-03-08.csv"


confirmed = pd.read_csv(confirmed_url)
deaths = pd.read_csv(deaths_url)
compare = pd.read_csv(dsrp_github)

###
compare = pd.read_csv("https://raw.githubusercontent.com/DataScienceResearchPeru/covid-19_latinoamerica/master/latam_covid_19_data/daily_reports/2020-03-08.csv")
brazil_compare = compare[compare.Country=="Brazil"]
###

brazil_compare = compare[compare['Country']=="Brazil"]

string_a = "áéíóúäëïöüâêîôûã" # character to be replaced
string_b = "aeiouaeiouaeioua" # character to replace with
Expand All @@ -40,18 +29,11 @@ def remove_tildes(string):

confirmed = confirmed.rename(columns={"Unnamed: 0": "Subdivision", "Unnamed: 1": "Code"})
deaths = deaths.rename(columns={"Unnamed: 0": "Subdivision", "Unnamed: 1": "Code"})
#print(confirmed)

sub_brazil = sorted(brazil_compare.Subdivision.unique())
sub_repo = sorted(confirmed.iloc[:,0].unique())

#print(sub_brazil)

#print("Subdivision brazil:", len(sub_brazil))
#print("Subdivisions:", len(sub_repo))

other_subdivisions = list(set(sub_repo) - (set(sub_brazil))) # Subdivisiones other than those listed in the main repo
#print(other_subdivisions)

confirmed = confirmed[~confirmed.Subdivision.isin(other_subdivisions)].sort_values("Subdivision")
deaths = deaths[~deaths.Subdivision.isin(other_subdivisions)].sort_values("Subdivision")
Expand All @@ -62,9 +44,6 @@ def remove_tildes(string):
confirmed_columns = confirmed.columns[1:]
deaths_columns = deaths.columns[1:]

#print(" ".join(confirmed_columns))
#print(" ".join(deaths_columns))

columns_order = ["ISO 3166-2 Code", "Country", "Subdivision", "Last Update", "Confirmed", "Deaths", "Recovered"]

for column in confirmed_columns:
Expand Down Expand Up @@ -106,9 +85,6 @@ def remove_tildes(string):
daily_report.Confirmed = daily_report.Confirmed.astype("int64")
daily_report.Recovered = daily_report.Recovered.astype("int64")


path_data=f"utils/scripts/data_collection/data/brazil_temporal/{date}.csv"
print(daily_report[daily_report.Country=="Brazil"])
# os.system("git pull")
daily_report.to_csv(f"utils/scripts/data_collection/data/brazil_temporal/{date}.csv", index=False)
# os.system(f"git add {daily_report_path}")
# os.system("git commit -m 'Update Brazil'")
daily_report.to_csv(path_data, index=False)
96 changes: 96 additions & 0 deletions utils/scripts/data_collection/data/peru_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import pandas as pd
import numpy as np
import datetime
import sys
import os


def get_iso_by_country_name(country_name, mode):

array_iso = np.array(['PE-AMA', 'PE-ANC', 'PE-APU', 'PE-ARE', 'PE-AYA', 'PE-CAJ', 'PE-CAL', 'PE-CUS', 'PE-HUV', 'PE-HUC', 'PE-ICA', 'PE-JUN',
'PE-LAL', 'PE-LAM', 'PE-LIM', 'PE-LOR', 'PE-MDD', 'PE-MOQ', 'PE-PAS', 'PE-PIU', 'PE-PUN', 'PE-SAM', 'PE-TAC', 'PE-TUM', 'PE-UCA'])
array_peru_csv = np.array(['Amazonas', 'Ancash', 'Apurimac', 'Arequipa', 'Ayacucho', 'Cajamarca', 'Callao', 'Cusco', 'Huancavelica', 'Huánuco', 'Ica', 'Junín',
'La Libertad', 'Lambayeque', 'Lima', 'Loreto', 'Madre de Dios', 'Moquegua', 'Pasco', 'Piura', 'Puno', 'San Martín', 'Tacna', 'Tumbes', 'Ucayali'])
array_peru_fixed = np.array(['Amazonas', 'Ancash', 'Apurimac', 'Arequipa', 'Ayacucho', 'Cajamarca', 'Callao', 'Cusco', 'Huancavelica', 'Huanuco', 'Ica', 'Junin',
'La Libertad', 'Lambayeque', 'Lima', 'Loreto', 'Madre de dios', 'Moquegua', 'Pasco', 'Piura', 'Puno', 'San Martin', 'Tacna', 'Tumbes', 'Ucayali'])

df = pd.DataFrame({'ISO 3166-2 Code': array_iso,
'Remote': array_peru_csv, 'Local': array_peru_fixed})

string_iso = ''

if mode == 'remote':
string_iso = df[df['Remote'] ==
country_name]['ISO 3166-2 Code'].values[0]
elif mode == 'local':
string_iso = df[df['Local'] ==
country_name]['ISO 3166-2 Code'].values[0]

return string_iso


def generate_list_dates(path):
# Generate dates from files existing
date_list_csv = []
path, dirs, files = next(os.walk(path))
numero_archivos = len(files)
print('There is {} files on the path and one is README. We iterate {} times...'.format(
numero_archivos, numero_archivos-1))
# dates
base = (datetime.datetime.today()).date()
numdays = numero_archivos-1
date_list_csv = [str(base - datetime.timedelta(days=x))+str('.csv')
for x in range(numdays)]
print('Adding {} dates in a list...'.format(len(date_list_csv)))
date_list = []
for d in date_list_csv:
date_list.append(d[:-4])
print("List of dates:", date_list)
return date_list_csv, date_list


def load_and_generatecsv():

today = datetime.datetime.now().strftime('%Y-%m-%d')

path_dsrp_daily_reports = 'latam_covid_19_data/daily_reports/'
path_peru_csv = "https://raw.githubusercontent.com/jmcastagnetto/covid-19-peru-data/master/datos/covid-19-peru-data.csv"
path_dsrp = "https://raw.githubusercontent.com/DataScienceResearchPeru/covid-19_latinoamerica/master/latam_covid_19_data/daily_reports/2020-03-08.csv"
path_csv="utils/scripts/data_collection/data/peru_temporal/"

data_peru = pd.read_csv(path_peru_csv)
data_dsrp = pd.read_csv(path_dsrp)

array_dates_csv, array_dates = generate_list_dates(path_dsrp_daily_reports)

for d in array_dates:

temp_dsrp = data_dsrp[data_dsrp['ISO 3166-2 Code'].str.contains('PE-')]

temp_dsrp['Confirmed'] = 0
temp_dsrp['Deaths'] = 0
temp_dsrp['Recovered'] = 0
# data_peru
data_peru = data_peru[data_peru['date'] == d]
data_peru = data_peru.fillna('')
data_peru = data_peru[data_peru['region'] != '']

for row in range(len(data_peru)):
numero_confirmed = data_peru.iloc[row]['confirmed']

string_iso = get_iso_by_country_name(
data_peru.iloc[row]['region'], 'remote')
f = temp_dsrp[temp_dsrp['ISO 3166-2 Code'] == string_iso]
# print(f.index.values[0])
temp_dsrp.loc[f.index.values[0], ['Confirmed']] = numero_confirmed
temp_dsrp.loc[f.index.values[0], ['Last Update']] = today

temp_dsrp = temp_dsrp.fillna('')

print(d, end=' - ')

temp_dsrp.to_csv(path_csv+d+'.csv',index=False)


if __name__ == "__main__":
load_and_generatecsv()
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-02-25.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-02-26.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-02-27.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-02-28.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-02-29.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-03-01.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-03-02.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
26 changes: 26 additions & 0 deletions utils/scripts/data_collection/data/peru_temporal/2020-03-03.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ISO 3166-2 Code,Country,Subdivision,Last Update,Confirmed,Deaths,Recovered
PE-AMA,Peru,Amazonas,,0,0,0
PE-ANC,Peru,Ancash,,0,0,0
PE-APU,Peru,Apurimac,,0,0,0
PE-ARE,Peru,Arequipa,,0,0,0
PE-AYA,Peru,Ayacucho,,0,0,0
PE-CAJ,Peru,Cajamarca,,0,0,0
PE-CAL,Peru,Callao,,0,0,0
PE-CUS,Peru,Cusco,,0,0,0
PE-HUV,Peru,Huancavelica,,0,0,0
PE-HUC,Peru,Huanuco,,0,0,0
PE-ICA,Peru,Ica,,0,0,0
PE-JUN,Peru,Junin,,0,0,0
PE-LAL,Peru,La Libertad,,0,0,0
PE-LAM,Peru,Lambayeque,,0,0,0
PE-LIM,Peru,Lima,,0,0,0
PE-LOR,Peru,Loreto,,0,0,0
PE-MDD,Peru,Madre de dios,,0,0,0
PE-MOQ,Peru,Moquegua,,0,0,0
PE-PAS,Peru,Pasco,,0,0,0
PE-PIU,Peru,Piura,,0,0,0
PE-PUN,Peru,Puno,,0,0,0
PE-SAM,Peru,San Martin,,0,0,0
PE-TAC,Peru,Tacna,,0,0,0
PE-TUM,Peru,Tumbes,,0,0,0
PE-UCA,Peru,Ucayali,,0,0,0
Loading

0 comments on commit 20cff47

Please sign in to comment.