diff --git a/CHANGELOG.bak b/CHANGELOG.bak index 4a2d800..48cf269 100644 --- a/CHANGELOG.bak +++ b/CHANGELOG.bak @@ -2,6 +2,10 @@ +## 0.0.6 + +- Make read_gh_repo default to returning dict + ## 0.0.5 - Bug fix in __init__ for read_gh_repo diff --git a/CHANGELOG.md b/CHANGELOG.md index 48cf269..77d5af1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ +## 0.0.7 + +- Update to dependencies + + ## 0.0.6 - Make read_gh_repo default to returning dict diff --git a/README.md b/README.md index 97a3451..1e0cbc2 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,10 @@ rd.read_url('https://www.answer.ai/')[:200] 'Answer.AI\n\n * __\n * __\n\n# Answer.AI - Practical AI R&D\n\n##### Categories\n\nAll (33)\n\nai (20)\n\ncoding (5)\n\ncompany (2)\n\ncourses (1)\n\neducation (1)\n\ninterview (1)\n\nopen-source (14)\n\npolicy (4)\n\nproduct' Other arguments are always optional, but can be useful at times. For -example, the `heavy` argument in `read_url` allows you to do a heavy -scrape with a contactless browser using `playwrightnb`. +example, the `heavy` argument in +[`read_url`](https://AnswerDotAI.github.io/contextkit/read.html#read_url) +allows you to do a heavy scrape with a contactless browser using +`playwrightnb`. ``` python rd.read_url('https://www.answer.ai/',heavy=True)[:200] diff --git a/contextkit/__init__.py b/contextkit/__init__.py index 3a3c2f1..3ea6e52 100644 --- a/contextkit/__init__.py +++ b/contextkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.6" +__version__ = "0.0.7" from . import read from .read import read_url, read_gist, read_gh_file, read_file, read_dir, read_pdf, read_yt_transcript, read_google_sheet, read_gdoc, read_arxiv, read_gh_repo diff --git a/contextkit/read.py b/contextkit/read.py index 02c4d27..297657a 100644 --- a/contextkit/read.py +++ b/contextkit/read.py @@ -15,7 +15,7 @@ import requests import fnmatch, mimetypes -from PyPDF2 import PdfReader +from pypdf import PdfReader from toolslm.download import html2md, read_html import tempfile, subprocess, os, re, shutil diff --git a/nbs/00_read.ipynb b/nbs/00_read.ipynb index 638ddb9..82c7748 100644 --- a/nbs/00_read.ipynb +++ b/nbs/00_read.ipynb @@ -63,7 +63,7 @@ "import requests\n", "import fnmatch, mimetypes\n", "\n", - "from PyPDF2 import PdfReader\n", + "from pypdf import PdfReader\n", "from toolslm.download import html2md, read_html\n", "\n", "import tempfile, subprocess, os, re, shutil\n", @@ -432,7 +432,7 @@ { "data": { "text/plain": [ - "' \\n \\n \\nThis is a test PDF document. \\nIf you can read this, you have Adobe Acrobat Reader installed on your computer. '" + "' \\n \\n \\n \\n \\n \\nThis is a test PDF document. \\nIf you can read this, you have Adobe Acrobat Reader installed on your computer. '" ] }, "execution_count": null, diff --git a/settings.ini b/settings.ini index 5d2599e..41560d8 100644 --- a/settings.ini +++ b/settings.ini @@ -1,7 +1,7 @@ [DEFAULT] repo = contextkit lib_name = contextkit -version = 0.0.6 +version = 0.0.7 min_python = 3.11 license = apache2 black_formatting = False @@ -26,7 +26,7 @@ keywords = nbdev jupyter notebook python language = English status = 3 user = AnswerDotAI -requirements = python-fasthtml httpx toolslm fastcore llms-txt PyPDF2 youtube_transcript_api pytube playwrightnb +requirements = python-fasthtml httpx toolslm fastcore llms-txt pypdf youtube_transcript_api pytube playwrightnb readme_nb = index.ipynb allowed_metadata_keys = allowed_cell_metadata_keys =