Skip to content

Commit

Permalink
Add labeled data and fix broken links
Browse files Browse the repository at this point in the history
  • Loading branch information
Lalith-Sagar-Devagudi committed Apr 30, 2024
1 parent 14a09a7 commit 6a1ba25
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"outputs": [],
"source": [
"# <tab: Image>\n",
"!curl -O s3://superduperdb-public-demo/images.zip && unzip images.zip\n",
"!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images.zip && unzip images.zip\n",
"import os\n",
"from PIL import Image\n",
"\n",
Expand All @@ -82,7 +82,7 @@
"outputs": [],
"source": [
"# <tab: Video>\n",
"!curl -O s3://superduperdb-public-demo/videos.zip && unzip videos.zip\n",
"!curl -O https://superduperdb-public-demo.s3.amazonaws.com/videos.zip && unzip videos.zip\n",
"import os\n",
"\n",
"data = [f'videos/{x}' for x in os.listdir('./videos')]\n",
Expand All @@ -100,13 +100,50 @@
"outputs": [],
"source": [
"# <tab: Audio>\n",
"!curl -O s3://superduperdb-public-demo/audio.zip && unzip audio.zip\n",
"!curl -O https://superduperdb-public-demo.s3.amazonaws.com/audio.zip && unzip audio.zip\n",
"import os\n",
"\n",
"data = [f'audios/{x}' for x in os.listdir('./audios')]\n",
"sample_datapoint = data[-1]\n",
"chunked_model_datatype = dtype('str')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce66da2f",
"metadata": {},
"outputs": [],
"source": [
"# <tab: Text (Labeled)>\n",
"!curl -O https://superduperdb-public-demo.s3.amazonaws.com/text_labeled.json\n",
"import json\n",
"\n",
"with open(\"text_labeled.json\", \"r\") as f:\n",
" data = json.load(f)\n",
"\n",
"sample_datapoint = data[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9740f757",
"metadata": {},
"outputs": [],
"source": [
"# <tab: Image (Labeled)>\n",
"!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images_labeled.zip && unzip images_labeled.zip\n",
"import json\n",
"from PIL import Image\n",
"\n",
"with open('images_labeled/images_labeled.json', 'r') as f:\n",
" data = json.load(f)\n",
"\n",
"data = [{'x': Image.open(d['image_path']), 'y': d['label']} for d in data]\n",
"\n",
"sample_datapoint = data[0]"
]
}
],
"metadata": {
Expand Down
31 changes: 28 additions & 3 deletions docs/hr/content/docs/reusable_snippets/get_useful_sample_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ from superduperdb import dtype
</TabItem>
<TabItem value="Image" label="Image" default>
```python
!curl -O s3://superduperdb-public-demo/images.zip && unzip images.zip
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images.zip && unzip images.zip
import os
from PIL import Image
Expand All @@ -53,7 +53,7 @@ from superduperdb import dtype
</TabItem>
<TabItem value="Video" label="Video" default>
```python
!curl -O s3://superduperdb-public-demo/videos.zip && unzip videos.zip
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/videos.zip && unzip videos.zip
import os
data = [f'videos/{x}' for x in os.listdir('./videos')]
Expand All @@ -65,12 +65,37 @@ from superduperdb import dtype
</TabItem>
<TabItem value="Audio" label="Audio" default>
```python
!curl -O s3://superduperdb-public-demo/audio.zip && unzip audio.zip
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/audio.zip && unzip audio.zip
import os
data = [f'audios/{x}' for x in os.listdir('./audios')]
sample_datapoint = data[-1]
chunked_model_datatype = dtype('str')
```
</TabItem>
<TabItem value="Text (Labeled)" label="Text (Labeled)" default>
```python
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/text_labeled.json
import json

with open("text_labeled.json", "r") as f:
data = json.load(f)

sample_datapoint = data[0]
```
</TabItem>
<TabItem value="Image (Labeled)" label="Image (Labeled)" default>
```python
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images_labeled.zip && unzip images_labeled.zip
import json
from PIL import Image

with open('images_labeled/images_labeled.json', 'r') as f:
data = json.load(f)

data = [{'x': Image.open(d['image_path']), 'y': d['label']} for d in data]

sample_datapoint = data[0]
```
</TabItem>
</Tabs>

0 comments on commit 6a1ba25

Please sign in to comment.