From ec4f57cfd1d8b65227f5674e9a114f3294d5f9fc Mon Sep 17 00:00:00 2001 From: Peyton Gardipee Date: Tue, 9 Sep 2025 13:42:18 +0000 Subject: [PATCH 1/3] Add readme docs for references to data connection dirs --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index 8988e57c4..008fddc69 100644 --- a/README.md +++ b/README.md @@ -1619,6 +1619,40 @@ if __name__ == "__main__": +
+ ✅ Direct uploads to Lightning AI data connections + +  + +[Lightning Studios](https://lightning.ai/) have special directories for data connections that are available to an entire teamspace. LitData functions that reference those directories will experience a significant performance increase as uploads and downloads will happen directly from the bucket that backs the folder. + +For example, output artifacts from this code will be directly uploaded to the `my-data-1` s3 bucket. + +``` +from litdata import optimize + +def should_keep(data): + if data % 2 == 0: + yield data + +if __name__ == "__main__": + optimize( + fn=should_keep, + inputs=list(range(1000)), + output_dir="/teamspace/s3_connections/my-data-1/output", + chunk_bytes="64MB", + num_workers=1 + ) +``` + +References to any of the following directories will work similarly: +1. `/teamspace/lightning_storage/...` +2. `/teamspace/s3_connections/...` +3. `/teamspace/gcs_connections/...` +4. `/teamspace/s3_folders/...` +5. `/teamspace/gcs_folders/...` +
+   From f45457f79906a4e6c782e03c45176c7e3387a93f Mon Sep 17 00:00:00 2001 From: Peyton Gardipee Date: Tue, 9 Sep 2025 14:52:36 +0100 Subject: [PATCH 2/3] Change title Co-authored-by: thomas chaton --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 008fddc69..da3365492 100644 --- a/README.md +++ b/README.md @@ -1620,7 +1620,7 @@ if __name__ == "__main__":
- ✅ Direct uploads to Lightning AI data connections + ✅ Lightning AI Data Connections - Direct download and upload   From 34aecdb441f52ab901963707951ec10e2268285b Mon Sep 17 00:00:00 2001 From: Peyton Gardipee Date: Tue, 9 Sep 2025 14:00:00 +0000 Subject: [PATCH 3/3] Add download example --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index da3365492..d72ad2867 100644 --- a/README.md +++ b/README.md @@ -1645,6 +1645,21 @@ if __name__ == "__main__": ) ``` + +Similarly, data will be downloaded directly from the `my-data-1` s3 bucket in this example code. + +``` +from litdata import StreamingRawDataset + +if __name__ == "__main__": + data_dir = "/teamspace/s3_connections/my-bucket-1/data" + + raw_dataset = StreamingRawDataset(data_dir) + + data = list(raw_dataset) + print(data) +``` + References to any of the following directories will work similarly: 1. `/teamspace/lightning_storage/...` 2. `/teamspace/s3_connections/...`