# To do
* use network access instead of example files
* make clear Dataset vs SnpReader vs Bed, SnpData = Zarr, Xarray
* Add sgkit example on Bed Data

In [7]:
# Create a SnpReader for reading PLINK Bed files (local or network).
# Look at the metadata without reading the genotype data.
from pysnptools.snpreader import Bed
url = "https://raw.githubusercontent.com/fastlmm/FaST-LMM/refs/heads/master/tests/datasets/all_chr.maf0.001.N300.bed"
snp_reader = Bed(url, count_A1=True)
print(f"Shape: {snp_reader.shape} (individuals × SNPs)")

Shape: (300, 1015) (individuals × SNPs)


In [8]:
import numpy as np

# List the first 5 individual (sample) ids, the first 5 SNP (variant) ids,
# and every unique chromosome.
print(snp_reader.iid[:5])
print(snp_reader.sid[:5])
print(np.unique(snp_reader.pos[:,0])) # "pos" is chrom, bp_position, and cm_position

# Read all genotype data from network to an in-memory SnpData
# that wraps a numpy array.
snp_data_all = snp_reader.read()
display(snp_data_all.val)

# Create a new SnpReader for every second individual and SNPs (variants) from 20 to 30.
# Without reading its data, use metadata to show the shape of this subset.
subset = snp_reader[::2, 20:30]
print(f"Subset shape: {subset.shape} (individuals × SNPs)")

#  From the original SnpReader, read every value in chromosome 5
# into a numpy array.
val3 = snp_reader[:,snp_reader.pos[:,0] == 5].read().val
print(val3.shape)

[['POP1' '0']
 ['POP1' '12']
 ['POP1' '44']
 ['POP1' '58']
 ['POP1' '65']]
['1_12' '1_34' '1_10' '1_35' '1_28']
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23.]


array([[0., 0., 1., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 2., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 0., 2., 0.]])

Subset shape: (150, 10) (individuals × SNPs)
(300, 43)


In [3]:
# Create a new snp_reader that re-orders and subsets the individuals
# and that leaves out chromosome 5.
iids_from_pheno_file = [["POP1", "65"], ["POP1", "44"], ["POP1", "0"]]
new_file_reader = snp_reader[snp_reader.iid_to_index(iids_from_pheno_file),snp_reader.pos[:,0] != 5]
print(f"# of individuals that will be read: {new_file_reader.iid_count}")
print(f"Note: not chrom 5 {np.unique(new_file_reader.pos[:,0])}")
# read just this data
val4 = new_file_reader.read().val
print(val4.shape)

# of individuals that will be read: 3
Note: not chrom 5 [ 1.  2.  3.  4.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19.
 20. 21. 22. 23.]
(3, 972)


In [9]:
# Convert PySnpTools BED data to xarray Dataset with full genomic metadata
import xarray as xr

# Create xarray Dataset using numeric indexes (avoids MultiIndex complexity and ensures Zarr compatibility)
xarray_ds = xr.Dataset(
    {
        "genotypes": (["individual", "snp"], snp_reader.read().val)
    },
    coords={
        "individual": range(snp_reader.iid_count),
        "fid": (["individual"], [fid for fid, _iid in snp_reader.read().iid]),  # Family IDs
        "iid": (["individual"], [iid for _fid, iid in snp_reader.read().iid]),  # Individual IDs
        
        "snp": range(snp_reader.sid_count),
        "sid": (["snp"], snp_reader.sid),                                                 # SNP IDs
        "chromosome": (["snp"], np.nan_to_num(snp_reader.pos[:, 0], nan=0).astype(int)),  # Chromosome (NaN→0)
        "cm_position": (["snp"], snp_reader.pos[:, 1]),                                   # Genetic position
        "bp_position": (["snp"], np.nan_to_num(snp_reader.pos[:, 2], nan=0).astype(int)), # Physical position (NaN→0)
    },
    attrs={
        "description": "Genotype data from PySnpTools BED file",
        "encoding": "0=homozygous ref, 1=heterozygous, 2=homozygous alt, NaN=missing",
        "source": url,
    }
)

display(xarray_ds)

In [10]:
# Import zarr explicitly and save to Zarr format
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='zarr') # OK to ignore unicode warnings

# Save to Zarr format for efficient storage and access
zarr_path = "all_chr.maf0.001.N300.zarr"
xarray_ds.to_zarr(zarr_path, mode='w')

# Verify by loading back lazily
zarr_ds = xr.open_zarr(zarr_path)
display(zarr_ds)


Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 7.93 kiB 7.93 kiB Shape (1015,) (1015,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 7.93 kiB 7.93 kiB Shape (1015,) (1015,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.93 kiB 7.93 kiB Shape (1015,) (1015,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.93 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.69 kiB,4.69 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 4.69 kiB 4.69 kiB Shape (300,) (300,) Dask graph 1 chunks in 2 graph layers Data type",300  1,

Unnamed: 0,Array,Chunk
Bytes,4.69 kiB,4.69 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,4.69 kiB,4.69 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 4.69 kiB 4.69 kiB Shape (300,) (300,) Dask graph 1 chunks in 2 graph layers Data type",300  1,

Unnamed: 0,Array,Chunk
Bytes,4.69 kiB,4.69 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,39.65 kiB,39.65 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 39.65 kiB 39.65 kiB Shape (1015,) (1015,) Dask graph 1 chunks in 2 graph layers Data type",1015  1,

Unnamed: 0,Array,Chunk
Bytes,39.65 kiB,39.65 kiB
Shape,"(1015,)","(1015,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,2.32 MiB,297.66 kiB
Shape,"(300, 1015)","(75, 508)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.32 MiB 297.66 kiB Shape (300, 1015) (75, 508) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",1015  300,

Unnamed: 0,Array,Chunk
Bytes,2.32 MiB,297.66 kiB
Shape,"(300, 1015)","(75, 508)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [6]:
# read all genotype data from the zarr file
zarr_ds.genotypes.values  # Accessing the genotype data directly

array([[0., 0., 1., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 2., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 0., 2., 0.]])

In [7]:
# Create a new zarr subset reader for every second individual
# and SNPs (variants) from 20 to 30.
zarr_subset = zarr_ds.genotypes[::2, 20:30]
zarr_subset.shape

(150, 10)

In [21]:
# On the original zarr_ds:
# List the first 5 individual (sample) ids, the first 5 SNP (variant) ids,
# and every unique chromosome. Then, read every value in chromosome 5.
print(zarr_ds.iid[:5].values)
print(zarr_ds.sid[:5].values)
print(np.unique(zarr_ds.chromosome))
print(zarr_ds.genotypes[:, zarr_ds.chromosome == 5].values)

Individual indices in phenotype file order: [4 2 0]
# of individuals: 3
Chromosomes: [ 1  2  3  4  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


Individual indices in phenotype file order: [4 2 0]
# of individuals: 3
Chromosomes: [ 1  2  3  4  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


Individual indices in phenotype file order: [4 2 0]
# of individuals: 3
Chromosomes: [ 1  2  3  4  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


Shape: (3, 972)


In [9]:
# Create a new zarr reader that re-orders and subsets the individuals and that leaves out chromosome 5.
import pandas as pd

iids_from_pheno_file = [["POP1", "65"], ["POP1", "44"], ["POP1", "0"]]

# Need to do tricks to efficiently REORDER (and subset) the Dataset.
# Create pandas MultiIndex for efficient lookup
zarr_iids = pd.MultiIndex.from_arrays([zarr_ds.fid.values, zarr_ds.iid.values])
pheno_iids = pd.MultiIndex.from_tuples(iids_from_pheno_file)
indices = zarr_iids.get_indexer(pheno_iids)

print(f"Individual indices in phenotype file order: {indices}")

# Do subsetting in one line: individuals first (named), then SNPs (boolean)
new_zarr = zarr_ds.isel(individual=indices).sel(snp=zarr_ds.chromosome != 5)
print(f"# of individuals: {new_zarr.sizes['individual']}")
print(f"Chromosomes: {np.unique(new_zarr.chromosome)}")
display(new_zarr)

# Read just this data
val4_zarr = new_zarr.genotypes.values
print(f"Shape: {val4_zarr.shape}")

Individual indices in phenotype file order: [4 2 0]
# of individuals: 3
Chromosomes: [ 1  2  3  4  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 7.59 kiB 7.59 kiB Shape (972,) (972,) Dask graph 1 chunks in 3 graph layers Data type int64 numpy.ndarray",972  1,

Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 7.59 kiB 7.59 kiB Shape (972,) (972,) Dask graph 1 chunks in 3 graph layers Data type int64 numpy.ndarray",972  1,

Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.59 kiB 7.59 kiB Shape (972,) (972,) Dask graph 1 chunks in 3 graph layers Data type float64 numpy.ndarray",972  1,

Unnamed: 0,Array,Chunk
Bytes,7.59 kiB,7.59 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,
"Array Chunk Bytes 48 B 48 B Shape (3,) (3,) Dask graph 1 chunks in 3 graph layers Data type",3  1,

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,
"Array Chunk Bytes 48 B 48 B Shape (3,) (3,) Dask graph 1 chunks in 3 graph layers Data type",3  1,

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,37.97 kiB,37.97 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,
"Array Chunk Bytes 37.97 kiB 37.97 kiB Shape (972,) (972,) Dask graph 1 chunks in 3 graph layers Data type",972  1,

Unnamed: 0,Array,Chunk
Bytes,37.97 kiB,37.97 kiB
Shape,"(972,)","(972,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,22.78 kiB,11.88 kiB
Shape,"(3, 972)","(3, 507)"
Dask graph,2 chunks in 4 graph layers,2 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 22.78 kiB 11.88 kiB Shape (3, 972) (3, 507) Dask graph 2 chunks in 4 graph layers Data type float64 numpy.ndarray",972  3,

Unnamed: 0,Array,Chunk
Bytes,22.78 kiB,11.88 kiB
Shape,"(3, 972)","(3, 507)"
Dask graph,2 chunks in 4 graph layers,2 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


Shape: (3, 972)


| Feature                | PySnpTools         | Xarray/Zarr           |
|------------------------|--------------------|-----------------------|
| schema(s)              | fixed              | dynamic               |
| full slicing & dicing  | yes                | yes                   |
| lazy loading           | yes                | yes                   |
| in memory, too         | `SnpData`          | `Xarray`            |
| index PLINK iid pairs  | yes                | yes, w/ Pandas        |

## Note on sgkit

sgkit's PLINK reading functions (`sgkit.io.plink.read_plink`) are **deprecated since v0.11.0**. The documentation recommends using:
- `bio2zarr` package for PLINK → Zarr conversion
- `vcztools` package for variant call format tools

Additionally, sgkit doesn't support `count_a1=True` which is important for matching PySnpTools behavior.

**Recommendation**: The PySnpTools → xarray → Zarr workflow demonstrated above is more practical because it:
- Maintains full PLINK compatibility
- Supports `count_a1=True` 
- Provides clean, readable code
- Integrates seamlessly with existing PySnpTools workflows

In [10]:
from pathlib import Path
import bio2zarr.plink
from pysnptools.util import example_file

# Get local path to the example BED file
bed_path = example_file("tests/datasets/all_chr.maf0.001.N300.*")
base_path = str(Path(bed_path).with_suffix(''))
zarr_output = "plink_data.zarr"

if not Path(zarr_output).exists():
    bio2zarr.plink.convert(base_path, zarr_output)
bio2zarr_ds = xr.open_zarr(zarr_output)
bio2zarr_ds

Unnamed: 0,Array,Chunk
Bytes,594.73 kiB,585.94 kiB
Shape,"(1015, 300, 2)","(1000, 300, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 594.73 kiB 585.94 kiB Shape (1015, 300, 2) (1000, 300, 2) Dask graph 2 chunks in 2 graph layers Data type int8 numpy.ndarray",2  300  1015,

Unnamed: 0,Array,Chunk
Bytes,594.73 kiB,585.94 kiB
Shape,"(1015, 300, 2)","(1000, 300, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,594.73 kiB,585.94 kiB
Shape,"(1015, 300, 2)","(1000, 300, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 594.73 kiB 585.94 kiB Shape (1015, 300, 2) (1000, 300, 2) Dask graph 2 chunks in 2 graph layers Data type bool numpy.ndarray",2  300  1015,

Unnamed: 0,Array,Chunk
Bytes,594.73 kiB,585.94 kiB
Shape,"(1015, 300, 2)","(1000, 300, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,297.36 kiB,292.97 kiB
Shape,"(1015, 300)","(1000, 300)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 297.36 kiB 292.97 kiB Shape (1015, 300) (1000, 300) Dask graph 2 chunks in 2 graph layers Data type bool numpy.ndarray",300  1015,

Unnamed: 0,Array,Chunk
Bytes,297.36 kiB,292.97 kiB
Shape,"(1015, 300)","(1000, 300)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,184 B,184 B
Shape,"(23,)","(23,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 184 B 184 B Shape (23,) (23,) Dask graph 1 chunks in 2 graph layers Data type object numpy.ndarray",23  1,

Unnamed: 0,Array,Chunk
Bytes,184 B,184 B
Shape,"(23,)","(23,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,576 B,576 B
Shape,"(24, 6)","(24, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 576 B 576 B Shape (24, 6) (24, 6) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",6  24,

Unnamed: 0,Array,Chunk
Bytes,576 B,576 B
Shape,"(24, 6)","(24, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.34 kiB,2.34 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 2.34 kiB 2.34 kiB Shape (300,) (300,) Dask graph 1 chunks in 2 graph layers Data type object numpy.ndarray",300  1,

Unnamed: 0,Array,Chunk
Bytes,2.34 kiB,2.34 kiB
Shape,"(300,)","(300,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.86 kiB,15.62 kiB
Shape,"(1015, 2)","(1000, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 15.86 kiB 15.62 kiB Shape (1015, 2) (1000, 2) Dask graph 2 chunks in 2 graph layers Data type object numpy.ndarray",2  1015,

Unnamed: 0,Array,Chunk
Bytes,15.86 kiB,15.62 kiB
Shape,"(1015, 2)","(1000, 2)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 0.99 kiB 0.98 kiB Shape (1015,) (1000,) Dask graph 2 chunks in 2 graph layers Data type int8 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.81 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 7.93 kiB 7.81 kiB Shape (1015,) (1000,) Dask graph 2 chunks in 2 graph layers Data type object numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,7.93 kiB,7.81 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 0.99 kiB 0.98 kiB Shape (1015,) (1000,) Dask graph 2 chunks in 2 graph layers Data type bool numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 0.99 kiB 0.98 kiB Shape (1015,) (1000,) Dask graph 2 chunks in 2 graph layers Data type int8 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 kiB,0.98 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.96 kiB,3.91 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 3.96 kiB 3.91 kiB Shape (1015,) (1000,) Dask graph 2 chunks in 2 graph layers Data type int32 numpy.ndarray",1015  1,

Unnamed: 0,Array,Chunk
Bytes,3.96 kiB,3.91 kiB
Shape,"(1015,)","(1000,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
