Skip to content
This repository has been archived by the owner on Jan 28, 2022. It is now read-only.

Commit

Permalink
Usa urllib.parse para lidar melhor com URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
AtilioA committed Apr 1, 2020
1 parent dc61e11 commit d6362f7
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions COVID19_ES_Py/boletim.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import re
from urllib.parse import urljoin
import logging
import unicodedata
import requests
Expand Down Expand Up @@ -135,7 +136,7 @@ def url_ultimo_boletim(self, html=None):
"article", class_="noticia list-content-item content-item"
)
ultimaNoticia = articleNoticia.find("a")["href"]
return ultimaNoticia
return urljoin(DOMINIO_BOLETINS, ultimaNoticia)
except AttributeError:
return None

Expand Down Expand Up @@ -182,7 +183,7 @@ def pesquisa_boletim_data(self, data):
dataBoletim = arrow.get(
matchData.group(0), "DD/MM/YYYY")
if dataBoletim == dataArrow:
return Boletim(f"{DOMINIO_BOLETINS}{linkBoletim}")
return Boletim(urljoin(DOMINIO_BOLETINS, linkBoletim))
return None


Expand Down Expand Up @@ -434,3 +435,7 @@ def filtra_municipios_com_casos(self):
}

return municipiosComCasos


a = ScraperBoletim()
b = a.url_ultimo_boletim()

0 comments on commit d6362f7

Please sign in to comment.