From 820a90b6277001b0f759062c05c19c8590465476 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 16 Jan 2023 21:45:14 +0000 Subject: [PATCH 1/5] a few fixes for a few tests --- tests/test_providers.py | 2 +- tests/test_ui.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index 0b361bd..1e7e3af 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -479,7 +479,7 @@ def test_acl_3(self): prov = ACL(upload=False, verbose=VERBOSE) url = "https://www.aclweb.org/anthology/2020.sigmorphon-1.29v2.pdf" exp = ( - "Burness_McMullin_-_Multi-Tiered_Strictly_Local_Functions_2020.pdf" + "Burness_Mcmullin_-_Multi-Tiered_Strictly_Local_Functions_2020.pdf" ) filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) diff --git a/tests/test_ui.py b/tests/test_ui.py index 5a9ebb3..eeac45e 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -177,12 +177,12 @@ def test_choose_provider_1(self): ( CiteSeerX, "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", - "https://citeseerx.ist.psu.edu:443/viewdoc/summary?doi=10.1.1.89.6548", + "https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", ), ( CiteSeerX, "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", - "https://citeseerx.ist.psu.edu:443/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", + "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", ), ( HTML, From 5b306a1a787a1895024831c78d06d8af58077066 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 23 Sep 2023 15:41:23 +0100 Subject: [PATCH 2/5] Fix neurips provider and tests --- paper2remarkable/providers/neurips.py | 24 +++++++++--------------- tests/test_providers.py | 5 ++++- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/paper2remarkable/providers/neurips.py b/paper2remarkable/providers/neurips.py index 9391ebc..a20e45d 100644 --- a/paper2remarkable/providers/neurips.py +++ b/paper2remarkable/providers/neurips.py @@ -16,7 +16,6 @@ class NeurIPSInformer(Informer): - meta_date_key = "citation_publication_date" def __init__(self, *args, **kwargs): @@ -30,17 +29,11 @@ def _format_authors(self, soup_authors): class NeurIPS(Provider): + re_abs = r"^https?://papers.n(eur)?ips.cc/paper/[\d\w\-]+$" + re_pdf = r"^https?://papers.n(eur)?ips.cc/paper/[\d\w\-]+.pdf$" - re_abs = "^https?://papers.n(eur)?ips.cc/paper/[\d\w\-]+$" - re_pdf = "^https?://papers.n(eur)?ips.cc/paper/[\d\w\-]+.pdf$" - - re_abs_2 = "https://papers.n(eur)?ips.cc/paper/\d{4}/hash/[0-9a-f]{32}-Abstract.html" - re_pdf_2 = ( - "https://papers.n(eur)?ips.cc/paper/\d{4}/file/[0-9a-f]{32}-Paper.pdf" - ) - - re_abs_3 = "https://proceedings.n(eur)?ips.cc/paper/\d{4}/hash/[0-9a-f]{32}-Abstract.html" - re_pdf_3 = "https://proceedings.n(eur)?ips.cc/paper/\d{4}/file/[0-9a-f]{32}-Paper.pdf" + re_abs_2 = r"https://(proceedings|papers).n(eur)?ips.cc/(paper_files/)?paper/\d{4}/hash/[0-9a-f]{32}-Abstract.html" + re_pdf_2 = r"https://(proceedings|papers).n(eur)?ips.cc/(paper_files/)?paper/\d{4}/file/[0-9a-f]{32}-Paper.pdf" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -54,19 +47,19 @@ def get_abs_pdf_urls(self, url): elif re.match(self.re_pdf, url): abs_url = url.replace(".pdf", "") pdf_url = url - elif re.match(self.re_abs_2, url) or re.match(self.re_abs_3, url): + elif re.match(self.re_abs_2, url): self.informer.new_site = True abs_url = url pdf_url = ( - url.replace("hash", "file") + url.replace("/hash/", "/file/") .replace("Abstract", "Paper") .replace(".html", ".pdf") ) - elif re.match(self.re_pdf_2, url) or re.match(self.re_pdf_3, url): + elif re.match(self.re_pdf_2, url): self.informer.new_site = True pdf_url = url abs_url = ( - url.replace("file", "hash") + url.replace("/file/", "/hash/") .replace("Paper", "Abstract") .replace(".pdf", ".html") ) @@ -74,6 +67,7 @@ def get_abs_pdf_urls(self, url): raise URLResolutionError("NeurIPS", url) return abs_url, pdf_url + @staticmethod def validate(src): return ( re.fullmatch(NeurIPS.re_abs, src) diff --git a/tests/test_providers.py b/tests/test_providers.py index 1e7e3af..299e6ce 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -244,7 +244,10 @@ def test_nber_2(self): def test_neurips_1(self): prov = NeurIPS(upload=False, verbose=VERBOSE) - url = "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf" + # NOTE: This test has been changed because the old url is not + # redirected (anymore?). Using the new url instead. + # url = "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf" + url = "https://proceedings.neurips.cc/paper_files/paper/1990/hash/89f0fd5c927d466d6ec9a21b9ac34ffa-Abstract.html" exp = "Platt_-_Leaning_by_Combining_Memorization_and_Gradient_Descent_1990.pdf" filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) From 374920993791267035ca461c960453adf6212426 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 Sep 2023 10:38:02 +0100 Subject: [PATCH 3/5] Fix or disable broken providers --- paper2remarkable/providers/__init__.py | 15 +++++++++--- paper2remarkable/providers/neurips.py | 2 -- tests/test_providers.py | 5 +++- tests/test_ui.py | 34 +++++++++----------------- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index 7d99563..87eb2a9 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -3,7 +3,7 @@ from .acl import ACL from .acm import ACM from .arxiv import Arxiv -from .citeseerx import CiteSeerX + from .cvf import CVF from .eccc import ECCC from .html import HTML @@ -17,18 +17,25 @@ from .pdf_url import PdfUrl from .pmlr import PMLR from .pubmed import PubMed -from .sagepub import SagePub -from .science_direct import ScienceDirect from .semantic_scholar import SemanticScholar from .springer import Springer + +from .citeseerx import CiteSeerX # disabled, incomplete html doc received + +# The following providers are no longer functional due to Cloudflare blocking +# automated access, and have therefore been removed from the list of providers +# below. +from .sagepub import SagePub +from .science_direct import ScienceDirect from .tandfonline import TandFOnline + + # NOTE: Order matters here, PdfUrl and HTML should be last providers = [ ACL, ACM, Arxiv, - CiteSeerX, CVF, ECCC, IACR, diff --git a/paper2remarkable/providers/neurips.py b/paper2remarkable/providers/neurips.py index a20e45d..4bf0515 100644 --- a/paper2remarkable/providers/neurips.py +++ b/paper2remarkable/providers/neurips.py @@ -74,6 +74,4 @@ def validate(src): or re.fullmatch(NeurIPS.re_pdf, src) or re.fullmatch(NeurIPS.re_abs_2, src) or re.fullmatch(NeurIPS.re_pdf_2, src) - or re.fullmatch(NeurIPS.re_abs_3, src) - or re.fullmatch(NeurIPS.re_pdf_3, src) ) diff --git a/tests/test_providers.py b/tests/test_providers.py index 299e6ce..d13cad8 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -148,7 +148,7 @@ def test_acm_3(self): def test_openreview(self): prov = OpenReview(upload=False, verbose=VERBOSE) url = "https://openreview.net/forum?id=S1x4ghC9tQ" - exp_filename = "Gregor_et_al_-_Temporal_Difference_Variational_Auto-Encoder_2019.pdf" + exp_filename = "Gregor_et_al_-_Temporal_Difference_Variational_Auto-Encoder_2018.pdf" filename = prov.run(url) self.assertEqual(exp_filename, os.path.basename(filename)) @@ -273,6 +273,7 @@ def test_neurips_4(self): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + @unittest.skip("CiteSeerX has been disabled due to automation failure") def test_citeseerx_1(self): prov = CiteSeerX(upload=False, verbose=VERBOSE) url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548" @@ -280,6 +281,7 @@ def test_citeseerx_1(self): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + @unittest.skip("CiteSeerX has been disabled due to automation failure") def test_citeseerx_2(self): prov = CiteSeerX(upload=False, verbose=VERBOSE) url = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf" @@ -379,6 +381,7 @@ def test_semantic_scholar_2(self): filename = prov.run(url) self.assertEqual(exp, os.path.basename(filename)) + @unittest.skip("PDF url doesn't point to pdf file anymore") def test_semantic_scholar_3(self): prov = SemanticScholar(upload=False, verbose=VERBOSE) url = "https://www.semanticscholar.org/paper/A-historical-account-of-how-continental-drift-and-Meinhold-%C5%9Eeng%C3%B6r/e7be87319985445e3ef7addf1ebd10899b92441f" diff --git a/tests/test_ui.py b/tests/test_ui.py index eeac45e..5164ac8 100644 --- a/tests/test_ui.py +++ b/tests/test_ui.py @@ -99,11 +99,11 @@ def test_choose_provider_1(self): "https://link.springer.com/article/10.1007/s10618-019-00631-5", "https://link.springer.com/article/10.1007/s10618-019-00631-5", ), - ( - PdfUrl, - "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", - "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", - ), + # ( + # PdfUrl, + # "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", + # "https://confcats_isif.s3.amazonaws.com/web-files/journals/entries/Nonlinear%20Kalman%20Filters.pdf", + # ), ( PdfUrl, "http://publications.aston.ac.uk/id/eprint/38334/1/5th_Artificial_Neural_Networks.pdf", @@ -159,30 +159,20 @@ def test_choose_provider_1(self): "https://www.nber.org/papers/w19152.pdf", "https://www.nber.org/system/files/working_papers/w19152/w19152.pdf", ), - ( - NeurIPS, - "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf", - "https://proceedings.neurips.cc/paper/1990/file/89f0fd5c927d466d6ec9a21b9ac34ffa-Paper.pdf", - ), + # ( # disabling; no longer redirected + # NeurIPS, + # "https://papers.nips.cc/paper/325-leaning-by-combining-memorization-and-gradient-descent.pdf", + # "https://proceedings.neurips.cc/paper/1990/file/89f0fd5c927d466d6ec9a21b9ac34ffa-Paper.pdf", + # ), ( NeurIPS, "https://papers.nips.cc/paper/7796-middle-out-decoding", - "https://papers.nips.cc/paper/2018/hash/0c215f194276000be6a6df6528067151-Abstract.html", + "https://papers.nips.cc/paper_files/paper/2018/hash/0c215f194276000be6a6df6528067151-Abstract.html", ), ( NeurIPS, "http://papers.neurips.cc/paper/7368-on-the-dimensionality-of-word-embedding.pdf", - "https://proceedings.neurips.cc/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Paper.pdf", - ), - ( - CiteSeerX, - "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", - "https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.89.6548", - ), - ( - CiteSeerX, - "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", - "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7607&rep=rep1&type=pdf", + "https://proceedings.neurips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Paper.pdf", ), ( HTML, From 8fe19c63e39bad4567526de32de0e111b5233ea9 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 Sep 2023 10:49:24 +0100 Subject: [PATCH 4/5] code formatting --- .pre-commit-config.yaml | 2 +- paper2remarkable/providers/__init__.py | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a853cb..88e19cc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: language_version: python3 - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort name: isort (python) diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py index 87eb2a9..3d7d3e7 100644 --- a/paper2remarkable/providers/__init__.py +++ b/paper2remarkable/providers/__init__.py @@ -3,7 +3,7 @@ from .acl import ACL from .acm import ACM from .arxiv import Arxiv - +from .citeseerx import CiteSeerX # disabled, incomplete html doc received from .cvf import CVF from .eccc import ECCC from .html import HTML @@ -17,20 +17,16 @@ from .pdf_url import PdfUrl from .pmlr import PMLR from .pubmed import PubMed -from .semantic_scholar import SemanticScholar -from .springer import Springer - -from .citeseerx import CiteSeerX # disabled, incomplete html doc received # The following providers are no longer functional due to Cloudflare blocking # automated access, and have therefore been removed from the list of providers # below. from .sagepub import SagePub from .science_direct import ScienceDirect +from .semantic_scholar import SemanticScholar +from .springer import Springer from .tandfonline import TandFOnline - - # NOTE: Order matters here, PdfUrl and HTML should be last providers = [ ACL, From 5b3b12daf58cc58b2d4b8348f873a85f97992d60 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sun, 24 Sep 2023 11:07:24 +0100 Subject: [PATCH 5/5] Code formatting --- .github/workflows/test.yml | 14 ++++++++++---- .pre-commit-config.yaml | 3 ++- paper2remarkable/providers/acl.py | 2 -- paper2remarkable/providers/acm.py | 1 - paper2remarkable/providers/arxiv.py | 1 - paper2remarkable/providers/citeseerx.py | 2 -- paper2remarkable/providers/cvf.py | 2 -- paper2remarkable/providers/eccc.py | 1 - paper2remarkable/providers/iacr.py | 1 - paper2remarkable/providers/jmlr.py | 2 -- paper2remarkable/providers/nature.py | 2 -- paper2remarkable/providers/nber.py | 2 -- paper2remarkable/providers/openreview.py | 2 -- paper2remarkable/providers/pmlr.py | 2 -- paper2remarkable/providers/pubmed.py | 2 -- paper2remarkable/providers/sagepub.py | 2 -- paper2remarkable/providers/science_direct.py | 2 -- paper2remarkable/providers/semantic_scholar.py | 2 -- paper2remarkable/providers/springer.py | 2 -- paper2remarkable/providers/tandfonline.py | 1 - 20 files changed, 12 insertions(+), 36 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31629a5..6ecb0ce 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: runs-on: [ 'ubuntu-latest' ] strategy: matrix: - py: [ '3.8', '3.11' ] + py: [ '3.8', '3.11' ] # minimum required and latest stable steps: - name: Install Python ${{ matrix.py }} @@ -27,12 +27,18 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - - name: Run unit test script - run: ./.github/scripts/test_p2r.sh - shell: bash + # NOTE: Keep versions in sync with .pre-commit-config.yaml - name: Run code quality tests (black) uses: psf/black@stable + with: + version: "23.3.0" - name: Run code quality tests (isort) uses: jamescurtin/isort-action@master + with: + isortVersion: "5.12.0" + + - name: Run unit test script + run: ./.github/scripts/test_p2r.sh + shell: bash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88e19cc..6513f58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,7 @@ +# NOTE: Keep versions in sync with Github Actions test.yml repos: - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.3.0 hooks: - id: black language_version: python3 diff --git a/paper2remarkable/providers/acl.py b/paper2remarkable/providers/acl.py index 13d48f6..964819f 100644 --- a/paper2remarkable/providers/acl.py +++ b/paper2remarkable/providers/acl.py @@ -16,7 +16,6 @@ class ACLInformer(Informer): - meta_date_key = "citation_publication_date" def _format_authors(self, soup_authors): @@ -24,7 +23,6 @@ def _format_authors(self, soup_authors): class ACL(Provider): - re_abs_1 = "^https://www.aclweb.org/anthology/(?P[0-9a-zA-Z\.\-]+)" re_abs_2 = "^https://(www.)?aclanthology.org/(?P[0-9a-zA-Z\.\-]+)" re_pdf_1 = "^https://www.aclweb.org/anthology/(?P[0-9a-zA-Z\.\-]*?)(v\d+)?.pdf" diff --git a/paper2remarkable/providers/acm.py b/paper2remarkable/providers/acm.py index 341edde..bbe64a4 100644 --- a/paper2remarkable/providers/acm.py +++ b/paper2remarkable/providers/acm.py @@ -43,7 +43,6 @@ def _format_year(self, soup_date): class ACM(Provider): - re_abs = "^https?://dl.acm.org/doi/(?P\d+\.\d+/\d+\.\d+)" re_pdf = "^https?://dl.acm.org/doi/pdf/(?P\d+\.\d+/\d+\.\d+)(\?download=true)?" diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py index 05fee24..a7224ae 100644 --- a/paper2remarkable/providers/arxiv.py +++ b/paper2remarkable/providers/arxiv.py @@ -29,7 +29,6 @@ class ArxivInformer(Informer): class Arxiv(Provider): - re_abs_1 = "https?://arxiv.org/abs/\d{4}\.\d{4,5}(v\d+)?" re_pdf_1 = "https?://arxiv.org/pdf/\d{4}\.\d{4,5}(v\d+)?\.pdf" diff --git a/paper2remarkable/providers/citeseerx.py b/paper2remarkable/providers/citeseerx.py index eef64bd..38b573a 100644 --- a/paper2remarkable/providers/citeseerx.py +++ b/paper2remarkable/providers/citeseerx.py @@ -20,7 +20,6 @@ class CiteSeerXInformer(Informer): - meta_author_key = "citation_authors" meta_date_key = "citation_year" @@ -30,7 +29,6 @@ def _format_authors(self, soup_authors): class CiteSeerX(Provider): - re_abs = "^https?:\/\/citeseerx.ist.psu.edu(:443)?\/viewdoc\/summary\?doi=(?P[0-9\.]+)" re_pdf = "^https?:\/\/citeseerx.ist.psu.edu(:443)?\/viewdoc\/download(\;jsessionid=[A-Z0-9]+)?\?doi=(?P[0-9\.]+)&rep=rep1&type=pdf" diff --git a/paper2remarkable/providers/cvf.py b/paper2remarkable/providers/cvf.py index 1041ef5..8ebf24d 100644 --- a/paper2remarkable/providers/cvf.py +++ b/paper2remarkable/providers/cvf.py @@ -19,12 +19,10 @@ class CVFInformer(Informer): - meta_date_key = "citation_publication_date" class CVF(Provider): - re_abs = "^https?://openaccess.thecvf.com/content_([\w\d]+)/html/([\w\d\_\-]+).html$" re_pdf = "^https?://openaccess.thecvf.com/content_([\w\d]+)/papers/([\w\d\_\-]+).pdf$" diff --git a/paper2remarkable/providers/eccc.py b/paper2remarkable/providers/eccc.py index 1a22b4e..c5435ce 100644 --- a/paper2remarkable/providers/eccc.py +++ b/paper2remarkable/providers/eccc.py @@ -64,7 +64,6 @@ def get_year(self, soup): class ECCC(Provider): - re_abs = "https?://eccc.weizmann.ac.il/report/\d{4}/\d+/?$" re_pdf = "https?://eccc.weizmann.ac.il/report/\d{4}/\d+/download/?$" diff --git a/paper2remarkable/providers/iacr.py b/paper2remarkable/providers/iacr.py index 3d80dd1..07718d8 100644 --- a/paper2remarkable/providers/iacr.py +++ b/paper2remarkable/providers/iacr.py @@ -62,7 +62,6 @@ def get_year(self, soup): class IACR(Provider): - re_abs = "https?://eprint.iacr.org/\d{4}/\d+$" re_pdf = "https?://eprint.iacr.org/\d{4}/\d+\.pdf$" re_ps = "https?://eprint.iacr.org/\d{4}/\d+\.ps$" diff --git a/paper2remarkable/providers/jmlr.py b/paper2remarkable/providers/jmlr.py index cee74b6..efc3e28 100644 --- a/paper2remarkable/providers/jmlr.py +++ b/paper2remarkable/providers/jmlr.py @@ -18,7 +18,6 @@ class JMLRInformer(Informer): - meta_date_key = "citation_publication_date" def _format_authors(self, soup_authors): @@ -29,7 +28,6 @@ def _format_authors(self, soup_authors): class JMLR(Provider): - re_abs_1 = "https?://(www\.)?jmlr\.org/papers/v(?P\d+)/(?P\d{2}\-\d{3}).html$" re_pdf_1 = "https?://(www\.)?jmlr\.org/papers/volume(?P\d+)/(?P\d{2}\-\d{3})/(?P=pid).pdf$" diff --git a/paper2remarkable/providers/nature.py b/paper2remarkable/providers/nature.py index 04c6693..48ebe7c 100644 --- a/paper2remarkable/providers/nature.py +++ b/paper2remarkable/providers/nature.py @@ -16,7 +16,6 @@ class NatureInformer(Informer): - meta_date_key = "citation_online_date" def _format_authors(self, soup_authors): @@ -24,7 +23,6 @@ def _format_authors(self, soup_authors): class Nature(Provider): - re_abs = "^https://www.nature.com/articles/s[a-z0-9\-]+$" re_pdf = "^https://www.nature.com/articles/s[a-z0-9\-]+\.pdf$" diff --git a/paper2remarkable/providers/nber.py b/paper2remarkable/providers/nber.py index 42a0227..7909cec 100644 --- a/paper2remarkable/providers/nber.py +++ b/paper2remarkable/providers/nber.py @@ -18,7 +18,6 @@ class NBERInformer(Informer): - meta_date_key = "citation_publication_date" def _format_authors(self, soup_authors, sep=" ", idx=0, op=None): @@ -26,7 +25,6 @@ def _format_authors(self, soup_authors, sep=" ", idx=0, op=None): class NBER(Provider): - re_abs = "https?://www\.nber\.org/papers/(?P[a-z0-9]+)$" re_pdf = "https?://www\.nber\.org/papers/(?P[a-z0-9]+)\.pdf$" diff --git a/paper2remarkable/providers/openreview.py b/paper2remarkable/providers/openreview.py index b640ba0..b9061bc 100644 --- a/paper2remarkable/providers/openreview.py +++ b/paper2remarkable/providers/openreview.py @@ -20,7 +20,6 @@ class OpenReviewInformer(Informer): - meta_date_key = "citation_publication_date" def get_authors(self, soup): @@ -56,7 +55,6 @@ def _format_authors(self, soup_authors): class OpenReview(Provider): - re_abs = "https?://openreview.net/forum\?id=[A-Za-z0-9]+" re_pdf = "https?://openreview.net/pdf\?id=[A-Za-z0-9]+" diff --git a/paper2remarkable/providers/pmlr.py b/paper2remarkable/providers/pmlr.py index 394dad1..391f4ab 100644 --- a/paper2remarkable/providers/pmlr.py +++ b/paper2remarkable/providers/pmlr.py @@ -16,7 +16,6 @@ class PMLRInformer(Informer): - meta_date_key = "citation_publication_date" def _format_authors(self, soup_authors): @@ -24,7 +23,6 @@ def _format_authors(self, soup_authors): class PMLR(Provider): - re_abs_1 = "https?://proceedings.mlr.press/v\d+/[\w\-\w]+\d+.html" re_pdf_1 = "https?://proceedings.mlr.press/v\d+/[\w\-\w]+\d+.pdf" diff --git a/paper2remarkable/providers/pubmed.py b/paper2remarkable/providers/pubmed.py index 9b2f601..0fe7fd4 100644 --- a/paper2remarkable/providers/pubmed.py +++ b/paper2remarkable/providers/pubmed.py @@ -16,7 +16,6 @@ class PubMedInformer(Informer): - meta_date_key = "citation_publication_date" meta_author_key = "citation_author" @@ -25,7 +24,6 @@ def _format_authors(self, soup_authors): class PubMed(Provider): - re_abs = "https?://www.ncbi.nlm.nih.gov/pmc/articles/PMC\d+/?" re_pdf = ( "https?://www.ncbi.nlm.nih.gov/pmc/articles/PMC\d+/pdf/nihms\d+\.pdf" diff --git a/paper2remarkable/providers/sagepub.py b/paper2remarkable/providers/sagepub.py index 49535dc..659b3a2 100644 --- a/paper2remarkable/providers/sagepub.py +++ b/paper2remarkable/providers/sagepub.py @@ -16,7 +16,6 @@ class SagePubInformer(Informer): - meta_author_key = "dc.Creator" meta_title_key = "dc.Title" meta_date_key = "dc.Date" @@ -29,7 +28,6 @@ def _format_year(self, soup_date): class SagePub(Provider): - re_abs = "https?:\/\/journals\.sagepub\.com\/doi\/full\/\d{2}\.\d{4}\/\d+" re_pdf = "https?:\/\/journals\.sagepub\.com\/doi\/pdf\/\d{2}\.\d{4}\/\d+" diff --git a/paper2remarkable/providers/science_direct.py b/paper2remarkable/providers/science_direct.py index a8c9187..384489a 100644 --- a/paper2remarkable/providers/science_direct.py +++ b/paper2remarkable/providers/science_direct.py @@ -29,7 +29,6 @@ class ScienceDirectInformer(Informer): - meta_date_key = "citation_publication_date" def get_authors(self, soup): @@ -44,7 +43,6 @@ def get_authors(self, soup): class ScienceDirect(Provider): - re_abs = ( "https?:\/\/www.sciencedirect.com/science/article/pii/[A-Za-z0-9]+" ) diff --git a/paper2remarkable/providers/semantic_scholar.py b/paper2remarkable/providers/semantic_scholar.py index 881cb6f..3dd7b41 100644 --- a/paper2remarkable/providers/semantic_scholar.py +++ b/paper2remarkable/providers/semantic_scholar.py @@ -20,7 +20,6 @@ class SemanticScholarInformer(Informer): - meta_date_key = "citation_publication_date" def _format_authors(self, soup_authors): @@ -28,7 +27,6 @@ def _format_authors(self, soup_authors): class SemanticScholar(Provider): - re_abs = ( "https?:\/\/www.semanticscholar.org/paper/[A-Za-z0-9%\-]+/[0-9a-f]{40}" ) diff --git a/paper2remarkable/providers/springer.py b/paper2remarkable/providers/springer.py index a73ce17..088be41 100644 --- a/paper2remarkable/providers/springer.py +++ b/paper2remarkable/providers/springer.py @@ -20,7 +20,6 @@ class SpringerInformer(Informer): - meta_date_key = None def _format_authors(self, soup_authors): @@ -36,7 +35,6 @@ def get_year(self, soup): class Springer(Provider): - re_abs_1 = "https?:\/\/link.springer.com\/article\/10\.\d{4}\/[a-z0-9\-]+" re_abs_2 = "https?:\/\/link.springer.com\/chapter\/10\.\d{4}\/[a-z0-9\-]+" re_pdf = "https?:\/\/link\.springer\.com\/content\/pdf\/10\.\d{4}(%2F|\/)[a-z0-9\-\_]+\.pdf" diff --git a/paper2remarkable/providers/tandfonline.py b/paper2remarkable/providers/tandfonline.py index 5e93f03..108de32 100644 --- a/paper2remarkable/providers/tandfonline.py +++ b/paper2remarkable/providers/tandfonline.py @@ -31,7 +31,6 @@ def _format_year(self, soup_date): class TandFOnline(Provider): - re_abs = "^https?://\w+.tandfonline.com/doi/(full|abs)/(?P\d+\.\d+/\w+\.\w+\.\w+)" re_pdf = "^https?://\w+.tandfonline.com/doi/(full|pdf)/(?P\d+\.\d+/\w+\.\w+\.\w+)"