Skip to content

Commit

Permalink
Merge pull request #13 from JackTheMico/release
Browse files Browse the repository at this point in the history
Release to main for semantic release
  • Loading branch information
JackTheMico committed Sep 2, 2022
2 parents c2b3a02 + 0c5e122 commit 1051b4c
Show file tree
Hide file tree
Showing 10 changed files with 427 additions and 56 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ on:
branches:
- "*"
paths-ignore:
- 'README.md'
- '**/README.md'
- 'pyproject.toml'
- '**/pyproject.toml'
pull_request:
branches:
- "*"
paths-ignore:
- 'README.md'
- '**/README.md'
- 'pyproject.toml'
- '**/pyproject.toml'
types:
- opened
Expand All @@ -33,7 +37,7 @@ jobs:
skip_after_successful_duplicate: 'true'
paths_ignore: '["**/README.md", "**/docs/**", "**/pyproject.toml"]'
do_not_skip: '["pull_request", "workflow_dispatch", "schedule"]'
build:
pytest:
needs: pre_job
if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
runs-on: ubuntu-latest
Expand Down
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ from ruia_peewee_async import (
after_start,
)


class DoubanItem(Item):
target_item = TextField(css_select="tr.item")
title = AttrField(css_select="a.nbg", attr="title")
Expand All @@ -53,18 +52,17 @@ class DoubanItem(Item):
async def clean_title(self, value):
return value.strip()


class DoubanSpider(Spider):
start_urls = ["https://movie.douban.com/chart"]
# aiohttp_kwargs = {"proxy": "http://127.0.0.1:7890"}

async def parse(self, response: Response):
async for item in DoubanItem.get_items(html=await response.text()):
yield RuiaPeeweeInsert(item.results) # default is MySQL
# yield RuiaPeeweeInsert(item.results, filters="url") # use url field(column) to deduplicate, avoid unnecessary insert query executed.
# yield RuiaPeeweeInsert(item.results, database=TargetDB.POSTGRES) # save to Postgresql
# yield RuiaPeeweeInsert(item.results, database=TargetDB.BOTH) # save to both MySQL and Postgresql


class DoubanUpdateSpider(Spider):
start_urls = ["https://movie.douban.com/chart"]

Expand All @@ -83,11 +81,10 @@ class DoubanUpdateSpider(Spider):
# data: A dict that's going to be updated in the database.
# query: A peewee's query or a dict to search for the target data in database.
# database: The target database type.
# filters: A str or List[str] of columns to avoid duplicate data and avoid unnecessary query execute.
# create_when_not_exists: Default is True. If True, will create a record when query can't get the record.
# not_update_when_exists: Default is True. If True and record exists, won't update data to the records.
# only: A list or tuple of fields that should be updated only.


mysql = {
"host": "127.0.0.1",
"port": 3306,
Expand Down
4 changes: 4 additions & 0 deletions examples/douban.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class DoubanSpider(Spider):
async def parse(self, response: Response):
async for item in DoubanItem.get_items(html=await response.text()):
yield RuiaPeeweeInsert(item.results) # default is MySQL
# use url field(column) to deduplicate, avoid unnecessary insert query executed.
# yield RuiaPeeweeInsert(item.results, filters="url")

# yield RuiaPeeweeInsert(item.results, database=TargetDB.POSTGRES) # save to Postgresql
# yield RuiaPeeweeInsert(item.results, database=TargetDB.BOTH) # save to both MySQL and Postgresql

Expand All @@ -49,6 +52,7 @@ async def parse(self, response: Response):
# data: A dict that's going to be updated in the database.
# query: A peewee's query or a dict to search for the target data in database.
# database: The target database type.
# filters: A str or List[str] of columns to avoid duplicate data and avoid unnecessary query execute.
# create_when_not_exists: Default is True. If True, will create a record when query can't get the record.
# not_update_when_exists: Default is True. If True and record exists, won't update data to the records.
# only: A list or tuple of fields that should be updated only.
Expand Down
Loading

0 comments on commit 1051b4c

Please sign in to comment.