From bc1e7329462372f5b2f90eac454ab42779a6808a Mon Sep 17 00:00:00 2001 From: zurmad Date: Tue, 7 Apr 2020 23:53:55 -0500 Subject: [PATCH] #57 #52 Semi-Automatic Honduras --- utils/scripts/honduras_data.py | 49 +++++++++++++++++++ .../scripts/honduras_temporal/2020-04-07.csv | 12 +++++ 2 files changed, 61 insertions(+) create mode 100644 utils/scripts/honduras_data.py create mode 100644 utils/scripts/honduras_temporal/2020-04-07.csv diff --git a/utils/scripts/honduras_data.py b/utils/scripts/honduras_data.py new file mode 100644 index 000000000..27f11d9d4 --- /dev/null +++ b/utils/scripts/honduras_data.py @@ -0,0 +1,49 @@ +from bs4 import BeautifulSoup +import urllib.request as urllib +import ssl +import json +import numpy as np +import pandas as pd +from datetime import date + +""" +Variables a cambiar +""" +# Scraping +webpage = 'http://covid19honduras.org/dll/ODEPTO.php' + +# ISO Code +default_columns = ['Number', 'Subdivision', + 'ISO 3166-2 Code', 'Confirmed', 'Death', 'Recovered'] +reorder_columns = ['ISO 3166-2 Code', 'Subdivision', + 'Country', 'Last Update', 'Confirmed', 'Death', 'Recovered'] + + +if __name__ == "__main__": + """ + SCRAP DATA + """ + print("We're going to scrap Honduras reports to create a pandas dataframe") + # Creamos conexión SSL necesaria + context = ssl._create_unverified_context() + web = urllib.urlopen(webpage, context=context) + soup = BeautifulSoup(web) + clean_json = json.loads(str(soup)) + # to create a pandas dataframe with the data + dataset = pd.DataFrame(clean_json) + # Naming columns + dataset.columns = default_columns + dataset['Country'] = 'Honduras' + dataset['Last Update'] = date.today() + print(dataset) + + # Reorder columns + dataset = dataset.reindex(columns=reorder_columns) + print(dataset) + + """ + CHANGE REPOSITORY FAILS + WARNING: TO_CSV TEMPORAL + """ + dataset.to_csv( + "utils/scripts/honduras_temporal/{}.csv".format(date.today()), sep=',', index=False) diff --git a/utils/scripts/honduras_temporal/2020-04-07.csv b/utils/scripts/honduras_temporal/2020-04-07.csv new file mode 100644 index 000000000..73dfcc433 --- /dev/null +++ b/utils/scripts/honduras_temporal/2020-04-07.csv @@ -0,0 +1,12 @@ +,ISO 3166-2 Code,Subdivision,Country,Last Update,Confirmed,Death,Recovered +0,HN-AT,Atlántida,Honduras,2020-04-07,16,1,1 +1,HN-CH,Choluteca,Honduras,2020-04-07,2,0,0 +2,HN-CL,Colón,Honduras,2020-04-07,20,1,0 +3,HN-CM,Comayagua,Honduras,2020-04-07,2,0,0 +4,HN-CP,Copán,Honduras,2020-04-07,1,0,0 +5,HN-CR,Cortés,Honduras,2020-04-07,195,15,0 +6,HN-FM,Francisco Morazán,Honduras,2020-04-07,54,2,5 +7,HN-LP,La Paz,Honduras,2020-04-07,1,0,0 +8,HN-LM,Lempira,Honduras,2020-04-07,4,1,0 +9,HN-SB,Santa Bárbara,Honduras,2020-04-07,8,0,0 +10,HN-YO,Yoro,Honduras,2020-04-07,9,2,0