From 81c30d09670fef9543a8bcc94fbd55662239974e Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:50:42 -0500 Subject: [PATCH] Deprecate spider il_capital_development --- .../spiders/il_capital_development.py | 79 --- tests/files/il_capital_development.html | 647 ------------------ tests/test_il_capital_development.py | 93 --- 3 files changed, 819 deletions(-) delete mode 100644 city_scrapers/spiders/il_capital_development.py delete mode 100644 tests/files/il_capital_development.html delete mode 100644 tests/test_il_capital_development.py diff --git a/city_scrapers/spiders/il_capital_development.py b/city_scrapers/spiders/il_capital_development.py deleted file mode 100644 index 3193f120a..000000000 --- a/city_scrapers/spiders/il_capital_development.py +++ /dev/null @@ -1,79 +0,0 @@ -from datetime import datetime, timedelta - -from city_scrapers_core.constants import BOARD -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class IlCapitalDevelopmentSpider(CityScrapersSpider): - name = "il_capital_development" - agency = "Illinois Capital Development Board" - timezone = "America/Chicago" - start_urls = ["https://www2.illinois.gov/cdb/about/boardmeetings"] - location = { - "name": "James R. Thompson Center", - "address": "100 West Randolph Street, 14th Floor, Chicago, IL 60601", - } - - def parse(self, response): - for item in response.xpath("//tbody/tr"): - self._validate_location(item) - meeting = Meeting( - title="Capital Development Board", - description="", - classification=BOARD, - start=self._parse_start(item), - end=self._parse_end(item), - all_day=self._parse_all_day(item), - time_notes="", - location=self.location, - links=self._parse_links(item), - source=self._parse_source(response), - ) - - meeting["status"] = self._get_status( - meeting, text=" ".join(item.css("td *::text").extract()) - ) - meeting["id"] = self._get_id(meeting) - - yield meeting - - def _parse_start(self, item): - """Parse start datetime as a naive datetime object.""" - start_date = item.xpath(".//th/text()").get() - start_time = item.xpath(".//th/text()[preceding-sibling::br]").get() - start = datetime.strptime(start_date + start_time, "%B %d, %Y %H:%M %p") - return start - - def _parse_end(self, item): - """Parse end datetime as a naive datetime object. Added by pipeline if None""" - end_date = item.xpath(".//th/text()").get() - end_time = "12:00 AM" - end = datetime.strptime(end_date + end_time, "%B %d, %Y %H:%M %p") - end += timedelta(days=1) - return end - - def _parse_all_day(self, item): - """Parse or generate all-day status. Defaults to False.""" - return False - - def _validate_location(self, item): - """Validate if location has changed""" - if "Chicago" not in " ".join(item.css("td *::text").extract()): - raise ValueError("Meeting location has changed") - - def _parse_links(self, item): - """Parse or generate links.""" - links = [] - for href in item.xpath(".//ul/li/a"): - links.append( - { - "title": href.xpath("text()").get().strip(), - "href": href.xpath("@href").get().strip(), - } - ) - return links - - def _parse_source(self, response): - """Parse or generate source.""" - return response.url diff --git a/tests/files/il_capital_development.html b/tests/files/il_capital_development.html deleted file mode 100644 index f08ba22f7..000000000 --- a/tests/files/il_capital_development.html +++ /dev/null @@ -1,647 +0,0 @@ - - - - - - 2019/2020 Meetings - - Board Meetings - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - -
- - -
- - -
-
- - - -
- - -
- -
-
-
- -
-
- - - -
- -
-
-
- -
-
- - -
-

Breadcrumb

- -
- - - -
-
-

- 2019/2020 Meetings -

-
-
- - -
- -
- - -
-
-
- - - -
-
-
- -
-
-
- -
- - -
-
- -
-
- -
-
- -
-
- - -
-
-
-
-
-
-
-
- - - -
-
-
-
-
-
-

CDB Board Meetings

-
DateLocation & InformationLinks
July 09, 2019
11:00 AM
Chicago, Springfield & Collinsville video-conference
August 13, 2019
11:00 AM
Chicago, Springfield & Collinsville video-conference
September 10, 2019
11:00 AM
Chicago, Springfield & Collinsville video-conference
October 08, 2019
11:00 AM
Chicago, Springfield & Collinsville video-conference
November 13, 2019
11:00 AM
Chicago, Springfield & Collinsville video-conference
December 10, 2019
12:00 AM
Chicago, Springfield & Collinsville video-conference
January 14, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
February 11, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
March 10, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
April 14, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
May 12, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
June 09, 2020
11:00 AM
Chicago, Springfield & Collinsville video-conference
-
-
-
-
-
-
- - -
- - - - -
-
-
- -
-
- -
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - \ No newline at end of file diff --git a/tests/test_il_capital_development.py b/tests/test_il_capital_development.py deleted file mode 100644 index 09ff7eaa2..000000000 --- a/tests/test_il_capital_development.py +++ /dev/null @@ -1,93 +0,0 @@ -from datetime import datetime -from os.path import dirname, join - -import pytest -from city_scrapers_core.constants import BOARD -from city_scrapers_core.utils import file_response -from freezegun import freeze_time - -from city_scrapers.spiders.il_capital_development import IlCapitalDevelopmentSpider - -test_response = file_response( - join(dirname(__file__), "files", "il_capital_development.html"), - url="https://www2.illinois.gov/cdb/about/boardmeetings/Pages/20192020Meetings.aspx", -) -spider = IlCapitalDevelopmentSpider() - -freezer = freeze_time("2019-10-26") -freezer.start() - -parsed_items = [item for item in spider.parse(test_response)] - -freezer.stop() - - -def test_title(): - assert parsed_items[0]["title"] == "Capital Development Board" - - -def test_description(): - assert parsed_items[0]["description"] == "" - - -def test_start(): - assert parsed_items[0]["start"] == datetime(2019, 7, 9, 11, 0) - - -def test_end(): - assert parsed_items[0]["end"] == datetime(2019, 7, 10, 12, 0) - - -def test_time_notes(): - assert parsed_items[0]["time_notes"] == "" - - -def test_id(): - assert ( - parsed_items[0]["id"] - == "il_capital_development/201907091100/x/capital_development_board" - ) - - -def test_status(): - assert parsed_items[0]["status"] == "passed" - - -def test_location(): - assert parsed_items[0]["location"] == { - "name": "James R. Thompson Center", - "address": "100 West Randolph Street, 14th Floor, Chicago, IL 60601", - } - - -def test_source(): - assert ( - parsed_items[0]["source"] - == "https://www2.illinois.gov/cdb/about/boardmeetings/Pages/20192020Meetings.aspx" # noqa - ) - - -def test_links(): - assert parsed_items[0]["links"] == [ - { - "href": "https://www2.illinois.gov/cdb/about/boardmeetings/Documents/2019-2020/TableOfContentsWEB-July.pdf", # noqa - "title": "Agenda", - }, - { - "href": "https://www2.illinois.gov/cdb/about/boardmeetings/Documents/2019-2020/July%20Board%20Book%20-%20Public.pdf", # noqa - "title": "Board Book", - }, - { - "href": "https://www2.illinois.gov/cdb/about/boardmeetings/Documents/2019-2020/07.09.19%20Meeting%20Minutes.docx", # noqa - "title": "Meeting Minutes", - }, - ] - - -def test_classification(): - assert parsed_items[0]["classification"] == BOARD - - -@pytest.mark.parametrize("item", parsed_items) -def test_all_day(item): - assert item["all_day"] is False