Skip to content
This repository has been archived by the owner on Oct 2, 2023. It is now read-only.

Commit

Permalink
Adiciona item e validação para pagamentos (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
anapaulagomes committed Dec 19, 2019
1 parent 2076838 commit 9bf846e
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 10 deletions.
18 changes: 18 additions & 0 deletions scraper/scraper/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,21 @@ class CityHallBidItem(BaseItem):
date = scrapy.Field()
file_urls = scrapy.Field()
file_content = scrapy.Field()


class CityHallPaymentsItem(BaseItem):
published_at = scrapy.Field()
phase = scrapy.Field()
company_or_person = scrapy.Field()
value = scrapy.Field()
number = scrapy.Field()
document = scrapy.Field()
date = scrapy.Field()
process_number = scrapy.Field()
summary = scrapy.Field()
group = scrapy.Field()
action = scrapy.Field()
function = scrapy.Field()
subfunction = scrapy.Field()
type_of_process = scrapy.Field()
resource = scrapy.Field()
2 changes: 2 additions & 0 deletions scraper/scraper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
CityCouncilAgendaItem,
CityHallBidItem,
CityHallContractItem,
CityHallPaymentsItem,
GazetteEventItem,
LegacyGazetteItem,
)
Expand Down Expand Up @@ -38,4 +39,5 @@
CityCouncilAgendaItem: "scraper.validators.CityCouncilAgendaItem",
CityHallContractItem: "scraper.validators.CityHallContractItem",
CityHallBidItem: "scraper.validators.CityHallBidItem",
CityHallPaymentsItem: "scraper.validators.CityHallPaymentsItem",
}
21 changes: 11 additions & 10 deletions scraper/scraper/spiders/cityhall.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime, date, timedelta
import re

from scraper.items import CityHallBidItem, CityHallContractItem
from scraper.items import CityHallBidItem, CityHallContractItem, CityHallPaymentsItem
import scrapy
from .utils import identify_contract_id

Expand Down Expand Up @@ -263,13 +263,14 @@ def parse_page(self, response):

for headline, raw_details in zip(headlines, details):
headline = [text.strip() for text in headline.css("td ::text").extract()]
data = {
"published_at": headline[0],
"phase": headline[1],
"company_or_person": headline[2],
"value": headline[3],
"crawled_at": response.url,
}
item = CityHallPaymentsItem(
published_at=headline[0],
phase=headline[1],
company_or_person=headline[2],
value=headline[3],
crawled_at=datetime.now(),
crawled_from=response.url,
)
details = [
detail.strip() for detail in raw_details.css("td ::text").extract()
]
Expand All @@ -290,6 +291,6 @@ def parse_page(self, response):
while details_copy:
key = details_copy.pop(0)
value = details_copy.pop(0)
data[mapping[key]] = value
item[mapping[key]] = value

yield data
yield item
18 changes: 18 additions & 0 deletions scraper/scraper/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,21 @@ class CityHallBidItem(BaseModel):
date = DateTimeType(formats=("%d/%m/%Y %Hh%M"))
file_urls = ListType(StringType)
file_content = StringType()


class CityHallPaymentsItem(BaseModel):
published_at = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
phase = StringType()
company_or_person = StringType(required=True)
value = StringType(required=True)
number = StringType()
document = StringType(required=True)
date = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
process_number = StringType()
summary = StringType()
group = StringType()
action = StringType()
function = StringType()
subfunction = StringType()
type_of_process = StringType()
resource = StringType()

0 comments on commit 9bf846e

Please sign in to comment.