AnswerDotAI · jph00 · Sep 11, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/contextkit/__init__.py b/contextkit/__init__.py
@@ -1,8 +1,8 @@
 __version__ = "0.0.10"
 
 from . import read
-from .read import read_url, read_gist, read_gh_file, read_file, read_dir, read_pdf, read_google_sheet, read_gdoc, read_arxiv, read_gh_repo
+from .read import read_link, read_url, read_gist, read_gh_file, read_file, read_dir, read_pdf, read_google_sheet, read_gdoc, read_arxiv, read_gh_repo
 
-__all__ = ["read", "read_url", "read_gist", "read_gh_file", "read_file", "read_dir", "read_pdf", "read_google_sheet", "read_gdoc", "read_arxiv", "read_gh_repo"]
+__all__ = ["read", "read_link", "read_gist", "read_gh_file", "read_file", "read_dir", "read_pdf", "read_google_sheet", "read_gdoc", "read_arxiv", "read_gh_repo"]
 
 
diff --git a/contextkit/_modidx.py b/contextkit/_modidx.py
@@ -17,6 +17,7 @@
                                  'contextkit.read.read_gh_repo': ('read.html#read_gh_repo', 'contextkit/read.py'),
                                  'contextkit.read.read_gist': ('read.html#read_gist', 'contextkit/read.py'),
                                  'contextkit.read.read_google_sheet': ('read.html#read_google_sheet', 'contextkit/read.py'),
+                                 'contextkit.read.read_link': ('read.html#read_link', 'contextkit/read.py'),
                                  'contextkit.read.read_pdf': ('read.html#read_pdf', 'contextkit/read.py'),
                                  'contextkit.read.read_text': ('read.html#read_text', 'contextkit/read.py'),
                                  'contextkit.read.read_url': ('read.html#read_url', 'contextkit/read.py')}}}
diff --git a/contextkit/read.py b/contextkit/read.py
@@ -3,15 +3,15 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_read.ipynb.
 
 # %% auto 0
-__all__ = ['read_text', 'read_url', 'read_gist', 'read_gh_file', 'read_file', 'read_dir', 'read_pdf', 'read_google_sheet',
-           'read_gdoc', 'read_arxiv', 'read_gh_repo']
+__all__ = ['read_url', 'read_text', 'read_link', 'read_gist', 'read_gh_file', 'read_file', 'read_dir', 'read_pdf',
+           'read_google_sheet', 'read_gdoc', 'read_arxiv', 'read_gh_repo']
 
 # %% ../nbs/00_read.ipynb 5
 import httpx 
 import html2text
 from fastcore.all import delegates, ifnone
 
-import re, os, glob, string
+import re, os, glob, string, warnings, functools
 import requests
 import fnmatch, mimetypes
 
@@ -30,7 +30,7 @@ def read_text(url, # URL to read
     return httpx.get(url, follow_redirects=True).text
 
 # %% ../nbs/00_read.ipynb 10
-def read_url(url: str,   # URL to read
+def read_link(url: str,   # URL to read
              heavy: bool = False,   # Use headless browser (requires extra setup steps before use)
              sel: Optional[str] = None,  # Css selector to pull content from
              useJina: bool = False, # Use Jina for the markdown conversion
@@ -44,7 +44,15 @@ def read_url(url: str,   # URL to read
         return playwrightnb.url2md(url,sel=ifnone(sel,'body'))
     elif heavy and useJina: raise NotImplementedError("Unsupported. No benefit to using Jina with playwrightnb")
 
-# %% ../nbs/00_read.ipynb 16
+# %% ../nbs/00_read.ipynb 14
+def read_url(*args,**kwargs):
+    warnings.warn("read_url() is deprecated, use read_link() instead. It is behaviorally identical.", 
+                  DeprecationWarning, stacklevel=2)
+    return read_link(*args,**kwargs)
+
+read_url = functools.wraps(read_link)(read_url)
+
+# %% ../nbs/00_read.ipynb 18
 def read_gist(url:str  # gist URL, of gist to read
              ):
     "Returns raw gist content, or None"
@@ -57,7 +65,7 @@ def read_gist(url:str  # gist URL, of gist to read
     else:
         return None
 
-# %% ../nbs/00_read.ipynb 20
+# %% ../nbs/00_read.ipynb 22
 def read_gh_file(url:str # GitHub URL of the file to read
                 ):
     "Reads the contents of a file from its GitHub URL"
@@ -66,19 +74,19 @@ def read_gh_file(url:str # GitHub URL of the file to read
     raw_url = re.sub(pattern, replacement, url)
     return httpx.get(raw_url).text
 
-# %% ../nbs/00_read.ipynb 24
+# %% ../nbs/00_read.ipynb 26
 def read_file(path:str):
     "returns file contents"
     with open(path,'r') as f: return f.read()
 
-# %% ../nbs/00_read.ipynb 25
+# %% ../nbs/00_read.ipynb 27
 def _is_unicode(filepath:str, sample_size:int=1024):
     try:
         with open(filepath, 'r') as file: sample = file.read(sample_size)
         return True
     except UnicodeDecodeError: return False
 
-# %% ../nbs/00_read.ipynb 28
+# %% ../nbs/00_read.ipynb 30
 def read_dir(path: str,                          # path to read
              unicode_only: bool = True,             # ignore non-unicode files
              included_patterns: List[str] = ["*"],       # glob pattern of files to include
@@ -106,15 +114,15 @@ def read_dir(path: str,                          # path to read
     else:
         return result
 
-# %% ../nbs/00_read.ipynb 31
+# %% ../nbs/00_read.ipynb 33
 def read_pdf(file_path: str # path of PDF file to read
             ) -> str:
     "Reads the text of a PDF with PdfReader"
     with open(file_path, 'rb') as file:
         reader = PdfReader(file)
         return ' '.join(page.extract_text() for page in reader.pages)
 
-# %% ../nbs/00_read.ipynb 36
+# %% ../nbs/00_read.ipynb 38
 def read_google_sheet(url: str # URL of a Google Sheet to read
                      ):
     "Reads the contents of a Google Sheet into text"
@@ -123,7 +131,7 @@ def read_google_sheet(url: str # URL of a Google Sheet to read
     res = requests.get(url=csv_url)
     return res.content
 
-# %% ../nbs/00_read.ipynb 41
+# %% ../nbs/00_read.ipynb 43
 def read_gdoc(url: str  # URL of Google Doc to read
              ):
     "Gets the text content of a Google Doc using html2text"
@@ -135,7 +143,7 @@ def read_gdoc(url: str  # URL of Google Doc to read
     doc_content = html2text.html2text(html_doc_content)
     return doc_content
 
-# %% ../nbs/00_read.ipynb 44
+# %% ../nbs/00_read.ipynb 46
 def read_arxiv(url:str, # arxiv PDF URL, or arxiv abstract URL, or arxiv ID
                save_pdf:bool=False, # True, will save the downloaded PDF
                save_dir:str='.' # directory in which to save the PDF
@@ -201,7 +209,7 @@ def read_arxiv(url:str, # arxiv PDF URL, or arxiv abstract URL, or arxiv ID
 
     return result
 
-# %% ../nbs/00_read.ipynb 46
+# %% ../nbs/00_read.ipynb 48
 def _gh_ssh_from_gh_url(gh_repo_address:str):
     "Given a GH URL or SSH remote address, returns a GH URL or None"
     pattern = r'https://github\.com/([^/]+)/([^/]+)(?:/.*)?'
@@ -249,7 +257,7 @@ def _get_git_repo(gh_ssh:str):
             print(f"Error cloning repo from cwd {temp_dir} with error {e}")
             return None
 
-# %% ../nbs/00_read.ipynb 47
+# %% ../nbs/00_read.ipynb 49
 def read_gh_repo(path_or_url:str,    # Repo's GitHub URL, or GH SSH address, or file path
                  as_dict:bool=True,  # if True, will return repo contents {path,content} dict
                  verbose:bool=False  # if True, will log paths of files being read

diff --git a/nbs/00_read.ipynb b/nbs/00_read.ipynb
@@ -59,7 +59,7 @@
     "import html2text\n",
     "from fastcore.all import delegates, ifnone\n",
     "\n",
-    "import re, os, glob, string\n",
+    "import re, os, glob, string, warnings, functools\n",
     "import requests\n",
     "import fnmatch, mimetypes\n",
     "\n",
@@ -131,7 +131,7 @@
    "outputs": [],
    "source": [
     "#| export\n",
-    "def read_url(url: str,   # URL to read\n",
+    "def read_link(url: str,   # URL to read\n",
     "             heavy: bool = False,   # Use headless browser (requires extra setup steps before use)\n",
     "             sel: Optional[str] = None,  # Css selector to pull content from\n",
     "             useJina: bool = False, # Use Jina for the markdown conversion\n",
@@ -164,7 +164,7 @@
     }
    ],
    "source": [
-    "read_url('https://fastht.ml/docs/', sel='#quarto-content')[:200]"
+    "read_link('https://fastht.ml/docs/', sel='#quarto-content')[:200]"
    ]
   },
   {
@@ -176,7 +176,7 @@
     {
      "data": {
       "text/plain": [
-       "'Title: FastHTML – fasthtml\\n\\nURL Source: https://fastht.ml/docs/\\n\\nMarkdown Content:\\nWelcome to the official FastHTML documentation.\\n\\nFastHTML is a new next-generation web framework for fast, scalable w'"
+       "'Title: FastHTML – fasthtml\\n\\nURL Source: https://fastht.ml/docs/\\n\\nPublished Time: Sun, 06 Jul 2025 21:56:52 GMT\\n\\nMarkdown Content:\\nWelcome to the official FastHTML documentation.\\n\\nFastHTML is a new nex'"
       ]
      },
      "execution_count": null,
@@ -186,7 +186,7 @@
    ],
    "source": [
     "#| eval: false\n",
-    "read_url('https://fastht.ml/docs/',useJina=True)[:200]"
+    "read_link('https://fastht.ml/docs/',useJina=True)[:200]"
    ]
   },
   {
@@ -206,6 +206,51 @@
      "output_type": "execute_result"
     }
    ],
+   "source": [
+    "read_link('https://fastht.ml/docs/',sel='#quarto-margin-sidebar')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "449d989a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def read_url(*args,**kwargs):\n",
+    "    warnings.warn(\"read_url() is deprecated, use read_link() instead. It is behaviorally identical.\", \n",
+    "                  DeprecationWarning, stacklevel=2)\n",
+    "    return read_link(*args,**kwargs)\n",
+    "\n",
+    "read_url = functools.wraps(read_link)(read_url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "922abf81",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_275424/3158535638.py:1: DeprecationWarning: read_url() is deprecated, use read_link() instead\n",
+      "  read_url('https://fastht.ml/docs/',sel='#quarto-margin-sidebar')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'## On this page\\n\\n  * Installation\\n  * Usage\\n    * Getting help from AI\\n  * Next Steps\\n  * Other languages and related projects\\n\\n  * [__Report an issue](https://github.com/AnswerDotAI/fasthtml/issues/new)\\n\\n## Other Formats\\n\\n  * [ __CommonMark](index.html.md)\\n\\n'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "read_url('https://fastht.ml/docs/',sel='#quarto-margin-sidebar')"
    ]
@@ -878,7 +923,13 @@
    "source": []
   }
  ],
- "metadata": {},
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 5
 }