##  Setup: Deep Lake in Colab

In [1]:
!pip install -q deeplake

### Step 1: Import Deep Lake

In [2]:
import deeplake

###  Step 2: Create a New Dataset (Local/Memory)

In [12]:
# Create a simple Deep Lake dataset (in-memory)
import deeplake
import numpy as np
from PIL import Image

# Create an in-memory dataset
ds = deeplake.create("mem://tiny_dataset")

# Create tensors
ds.create_tensor("images", htype="image", sample_compression="jpeg")
ds.create_tensor("labels", htype="class_label", class_names=["red", "blue"])

# Add samples
for i in range(6):
    color = "red" if i < 3 else "blue"
    rgb = [255, 0, 0] if color == "red" else [0, 0, 255]
    img = Image.new("RGB", (32, 32), tuple(rgb))
    ds.append({"images": img, "labels": color})

# Commit the changes
ds.commit("Initial simple dataset")


LogExistsError: Dataset already exists at path 'mem://tiny_dataset/'

In [87]:
import deeplake
from deeplake.types import ClassLabel, Array

# Define schema correctly
schema = {
    "labels": ClassLabel(dtype="int64"),  # Store labels as integers
    "values": Array(shape=(3,), dtype="float32")  # Array type for values
}

# Create dataset using the correct function
ds = deeplake.create("mem://tiny_dataset5", schema=schema)
#ds = deeplake.memory_dataset(schema=schema)

# Mapping class names to indices
class_mapping = {"cat": 0, "dog": 1}

# Add entries using numeric labels
ds.append({"labels": [class_mapping["cat"]], "values": [[0.1, 0.2, 0.3]]})
ds.append({"labels": [class_mapping["dog"]], "values": [[0.4, 0.5, 0.6]]})


# Commit changes
ds.commit("Initial insert")


In [92]:
# Filter training data
## Challenge: could you find this in the other notebook?

In [21]:
import deeplake.types
print(dir(deeplake.types))

['Array', 'BM25', 'Binary', 'BinaryMask', 'Bool', 'BoundingBox', 'Bytes', 'ClassLabel', 'Clustered', 'ClusteredQuantized', 'DataType', 'Dict', 'Embedding', 'EmbeddingIndex', 'EmbeddingIndexEnumType', 'EmbeddingIndexType', 'EmbeddingsMatrixIndex', 'EmbeddingsMatrixIndexType', 'Float16', 'Float32', 'Float64', 'Image', 'IndexType', 'Int16', 'Int32', 'Int64', 'Int8', 'Inverted', 'Link', 'Medical', 'Point', 'Polygon', 'QuantizationType', 'SegmentMask', 'Sequence', 'Struct', 'Text', 'TextIndex', 'TextIndexEnumType', 'TextIndexType', 'Type', 'TypeKind', 'UInt16', 'UInt32', 'UInt64', 'UInt8', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__']


### Step 3: Add Some Data

In [40]:
# Add 3 records
# Append records one by one
ds.append({"labels": [0], "values": [[5]]})
ds.append({"labels": [1], "values": [[10]]})
ds.append({"labels": [0], "values": [[15]]})

# Commit the current state (like Git commit)
ds.commit("Initial version: 3 samples")

### Step 4: Inspect the Dataset

In [42]:
print("Labels:", ds["labels"][:])
print("Values:", ds["values"][:])

Labels: [0 1 0 1 0]
Values: [array([0.1, 0.2, 0.3], dtype=float32), array([0.4, 0.5, 0.6], dtype=float32), array([5.], dtype=float32), array([10.], dtype=float32), array([15.], dtype=float32)]


### Step 5: Make a Change (add one more item)

In [44]:
ds.append({"labels": [1], "values": [[20]]})  # Wrap values in nested list
ds.commit("Added 4th sample (dog, 20)")

### Step 6: View Commit History

In [48]:
print(ds.history)

0000000000v00000000 2025-05-07 19:03:25 Dataset created
0000000001m00000000 2025-05-07 19:03:25 Created initial schema
0000000002m00000000 2025-05-07 19:03:25 Initial insert
0000000003m00000000 2025-05-07 19:08:25 Initial version: 3 samples
0000000004m00000000 2025-05-07 19:11:22 Added 4th sample (dog, 20)



### Step 7: Checkout Older Version

In [69]:
# Step 4: Show current (latest) state
print("Current version:")
print("Labels:", ds["labels"][:])
print("Values:", ds["values"][:])

Current version:
Labels: [0 1 0 1 0 1]
Values: [array([0.1, 0.2, 0.3], dtype=float32), array([0.4, 0.5, 0.6], dtype=float32), array([5.], dtype=float32), array([10.], dtype=float32), array([15.], dtype=float32), array([20.], dtype=float32)]


In [70]:
ds.summary()

Dataset length: 6
Columns:
  labels: kind=class_label, dtype=int64
  values: array(dtype=float32, shape=(3))




In [71]:

print("Labels:", ds["labels"][:])
print("Values:", ds["values"][:])


After checkout to first version:
Labels: [0 1 0 1 0 1]
Values: [array([0.1, 0.2, 0.3], dtype=float32), array([0.4, 0.5, 0.6], dtype=float32), array([5.], dtype=float32), array([10.], dtype=float32), array([15.], dtype=float32), array([20.], dtype=float32)]


In [72]:
print(hasattr(ds, "checkout"))

False


### Step 8: Switch Back to Latest

In [93]:
#ds.checkout("active")
#print("Back to latest version:")
#print("Labels:", ds.labels[:])
print("Back to latest version:")
print("Labels:", ds["labels"][:])
print("Values:", ds["values"][:])

Back to latest version:
Labels: [0 1]
Values: [[0.1 0.2 0.3]
 [0.4 0.5 0.6]]


### Save Dataset Locally or to Deep Lake Hub

 To save to local disk:

In [94]:
#Challange: who can save the data on disk? (Hint: https://docs.deeplake.ai/4.0/api/dataset/)
print(dir(deeplake))

['AgreementError', 'AgreementNotAcceptedError', 'Any', 'AuthenticationError', 'AuthorizationError', 'BadRequestError', 'Branch', 'BranchExistsError', 'BranchNotFoundError', 'BranchView', 'Branches', 'BranchesView', 'BytePositionIndexOutOfChunk', 'Callable', 'CanNotCreateTensorWithProvidedCompressions', 'CannotDeleteMainBranchError', 'CannotRenameMainBranchError', 'Client', 'Column', 'ColumnAlreadyExistsError', 'ColumnDefinition', 'ColumnDefinitionView', 'ColumnDoesNotExistError', 'ColumnMissingAppendValueError', 'ColumnView', 'CredsKeyAlreadyAssignedError', 'Dataset', 'DatasetUnavailableError', 'DatasetView', 'Dict', 'DimensionsMismatch', 'DtypeMismatch', 'EmbeddingSizeMismatch', 'Executor', 'ExpiredTokenError', 'FormatNotSupportedError', 'Future', 'FutureVoid', 'GcsStorageProviderFailed', 'HTTPBodyIsMissingError', 'HTTPBodyIsNotJSONError', 'HTTPRequestFailedError', 'History', 'IncorrectDeeplakePathError', 'IndexAlreadyExistsError', 'IndexingMode', 'InvalidBinaryMaskCompression', 'Inva

 To use the Deep Lake Hub, (We could not try it yet, because we do not have login yet)

In [96]:
ds.push("hub://<username>/tiny_dataset5") # (Requires login via deeplake.login() first)

HTTPRequestFailedError: Method failed: GET resource: /api/org/<username>/ds/tiny_dataset5/creds?no_cache=True message: Can't fetch resource - https://app.activeloop.ai/api/org/<username>/ds/tiny_dataset5/creds?no_cache=True  

 If you want to access existing dataset OR one may also follow: https://docs.deeplake.ai/4.0/api/dataset/

In [None]:
import deeplake

# Create a local Deep Lake dataset
path = "/path/to/local/directory"
ds = deeplake.dataset(path)

# Append data to the dataset
ds["image"].append(...)
ds["label"].append(...)

# Access the dataset and its tensors
image_tensor = ds["image"]
label_tensor = ds["label"]

# ... perform operations on the tensors ...

# Load the dataset
ds = deeplake.load(path)

 Follow the next Jupyter notebook from here
 AND/OR
 Problem: Time-Series Data Storage & Retrieval
1.Gather stock price or weather data over time.
2.Store data in Deep Lake’s optimized dataset format.
3.Develop a query system to retrieve past trends efficiently. (For extra-points)
