From 23d6fc86cc04d700970f5226cc81b04ebb92bfd1 Mon Sep 17 00:00:00 2001 From: Greg Schoeninger Date: Thu, 14 Mar 2024 14:43:01 -0700 Subject: [PATCH] add python docs for oxen.datasets --- oxen/python/oxen/datasets.py | 62 +++++++++++++++++------------------- oxen/python/oxen/df_utils.py | 12 ------- 2 files changed, 29 insertions(+), 45 deletions(-) diff --git a/oxen/python/oxen/datasets.py b/oxen/python/oxen/datasets.py index 21a0882..5133c49 100644 --- a/oxen/python/oxen/datasets.py +++ b/oxen/python/oxen/datasets.py @@ -1,7 +1,6 @@ from oxen import RemoteRepo -from typing import Sequence, Union from typing import Optional from datasets import load_dataset as hf_load_dataset @@ -15,16 +14,15 @@ def load_dataset( """ Load a dataset from a repo into memory. - Parameters - ---------- - repo_id : str - The {namespace}/{name} of the oxen repository to load the dataset from - path : str | Sequence[str] - The path to the dataset we want to load - fmt : str - The format of the data files. Currently only "hugging_face" is supported. - revision : str | None - The commit id or branch name of the version of the data to download + Args: + repo_id: `str` + The namespace/repo_name of the oxen repository to load the dataset from + path: `str` | Sequence[str] + The path to the dataset we want to load + fmt: `str` + The format of the data files. Currently only "hugging_face" is supported. + revision: `str` | None + The commit id or branch name of the version of the data to download """ if fmt == "hugging_face": @@ -49,16 +47,15 @@ def download(repo_id: str, path: str, revision=None, dst=None): """ Download files or directories from a remote Oxen repository. - Parameters - ---------- - repo_id : str - The {namespace}/{name} of the oxen repository to load the dataset from - path : str - The path to the data files - revision : str | None - The commit id or branch name of the version of the data to download - dst : str | None - The path to download the data to. + Args: + repo_id: `str` + The namespace/repo_name of the oxen repository to load the dataset from + path: `str` + The path to the data files + revision: `str | None` + The commit id or branch name of the version of the data to download + dst: `str | None` + The path to download the data to. """ repo = RemoteRepo(repo_id) @@ -68,18 +65,17 @@ def upload(repo_id: str, path: str, message: str, branch: Optional[str]=None, ds """ Upload files or directories to a remote Oxen repository. - Parameters - ---------- - repo_id : str - The {namespace}/{name} of the oxen repository to upload the dataset to - path : str - The path to the data files - message : str - The commit message to use when uploading the data - branch : str | None - The branch to upload the data to. If None, the `main` branch is used. - dst : str | None - The directory to upload the data to. + Args: + repo_id: `str` + The namespace/repo_name of the oxen repository to upload the dataset to + path: `str` + The path to the data files + message: `str` + The commit message to use when uploading the data + branch: `str | None` + The branch to upload the data to. If None, the `main` branch is used. + dst: `str | None` + The directory to upload the data to. """ repo = RemoteRepo(repo_id) diff --git a/oxen/python/oxen/df_utils.py b/oxen/python/oxen/df_utils.py index b8c5485..93a421b 100644 --- a/oxen/python/oxen/df_utils.py +++ b/oxen/python/oxen/df_utils.py @@ -22,18 +22,6 @@ import os from polars import DataFrame - -class load_df_call: - """ - Reads a file into a data frame. The file format is inferred from the file extension. - - Supported types: csv, parquet, json, jsonl, arrow - """ - - def __call__(self, path: os.PathLike): - return load(path) - - def load( path: os.PathLike, ):