Skip to content

Commit

Permalink
add python docs for oxen.datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
gschoeni committed Mar 14, 2024
1 parent 731d856 commit 23d6fc8
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 45 deletions.
62 changes: 29 additions & 33 deletions oxen/python/oxen/datasets.py
@@ -1,7 +1,6 @@

from oxen import RemoteRepo

from typing import Sequence, Union
from typing import Optional

from datasets import load_dataset as hf_load_dataset
Expand All @@ -15,16 +14,15 @@ def load_dataset(
"""
Load a dataset from a repo into memory.
Parameters
----------
repo_id : str
The {namespace}/{name} of the oxen repository to load the dataset from
path : str | Sequence[str]
The path to the dataset we want to load
fmt : str
The format of the data files. Currently only "hugging_face" is supported.
revision : str | None
The commit id or branch name of the version of the data to download
Args:
repo_id: `str`
The namespace/repo_name of the oxen repository to load the dataset from
path: `str` | Sequence[str]
The path to the dataset we want to load
fmt: `str`
The format of the data files. Currently only "hugging_face" is supported.
revision: `str` | None
The commit id or branch name of the version of the data to download
"""

if fmt == "hugging_face":
Expand All @@ -49,16 +47,15 @@ def download(repo_id: str, path: str, revision=None, dst=None):
"""
Download files or directories from a remote Oxen repository.
Parameters
----------
repo_id : str
The {namespace}/{name} of the oxen repository to load the dataset from
path : str
The path to the data files
revision : str | None
The commit id or branch name of the version of the data to download
dst : str | None
The path to download the data to.
Args:
repo_id: `str`
The namespace/repo_name of the oxen repository to load the dataset from
path: `str`
The path to the data files
revision: `str | None`
The commit id or branch name of the version of the data to download
dst: `str | None`
The path to download the data to.
"""

repo = RemoteRepo(repo_id)
Expand All @@ -68,18 +65,17 @@ def upload(repo_id: str, path: str, message: str, branch: Optional[str]=None, ds
"""
Upload files or directories to a remote Oxen repository.
Parameters
----------
repo_id : str
The {namespace}/{name} of the oxen repository to upload the dataset to
path : str
The path to the data files
message : str
The commit message to use when uploading the data
branch : str | None
The branch to upload the data to. If None, the `main` branch is used.
dst : str | None
The directory to upload the data to.
Args:
repo_id: `str`
The namespace/repo_name of the oxen repository to upload the dataset to
path: `str`
The path to the data files
message: `str`
The commit message to use when uploading the data
branch: `str | None`
The branch to upload the data to. If None, the `main` branch is used.
dst: `str | None`
The directory to upload the data to.
"""

repo = RemoteRepo(repo_id)
Expand Down
12 changes: 0 additions & 12 deletions oxen/python/oxen/df_utils.py
Expand Up @@ -22,18 +22,6 @@
import os
from polars import DataFrame


class load_df_call:
"""
Reads a file into a data frame. The file format is inferred from the file extension.
Supported types: csv, parquet, json, jsonl, arrow
"""

def __call__(self, path: os.PathLike):
return load(path)


def load(
path: os.PathLike,
):
Expand Down

0 comments on commit 23d6fc8

Please sign in to comment.