In [1]:
# Import required packages (will fail if not available)
import xarray as xr
import pysnptools

print(f"✓ xarray {xr.__version__}")
print(f"✓ pysnptools {type(pysnptools)}")

✓ xarray 2025.7.1
✓ pysnptools <class 'module'>


In [2]:
# Load sample BED file using PySnpTools
from pysnptools.snpreader import Bed
from pysnptools.util import example_file

bed_file = example_file("tests/datasets/all_chr.maf0.001.N300.*")
snp_reader = Bed(bed_file)
print(f"Shape: {snp_reader.shape} (individuals × SNPs)")

Shape: (300, 1015) (individuals × SNPs)




In [3]:
# Read data and convert to xarray with metadata
import numpy as np
import pandas as pd

# Read the actual genotype data
snp_data = snp_reader.read()
print(f"Data loaded: {snp_data.val.shape}")

# Extract all metadata
# Individual metadata
fid = [fid for fid, _iid in snp_data.iid]
iid = [iid for _fid, iid in snp_data.iid]

# SNP metadata (SNP IDs and positions)
sid = snp_data.sid
chromosome = snp_data.pos[:, 0]
cm_position = snp_data.pos[:, 1]
bp_position = snp_data.pos[:, 2]

# Create xarray DataArray with full metadata
genotypes = xr.DataArray(
    snp_data.val,
    dims=["individual", "snp"],
    coords={
        "individual": pd.MultiIndex.from_tuples(snp_data.iid, names=["fid", "iid"]),
        "snp": sid,
        "fid": (["individual"], fid),
        "iid": (["individual"], iid),
        "chromosome": (["snp"], chromosome),
        "cm_position": (["snp"], cm_position),
        "bp_position": (["snp"], bp_position),
    },
    attrs={
        "description": "Genotype data from PySnpTools BED file",
        "encoding": "0=homozygous ref, 1=heterozygous, 2=homozygous alt, NaN=missing",
        "source": bed_file,
    }
)

print(f"\nxarray DataArray created:")
print(genotypes)

Data loaded: (300, 1015)


ValueError: Buffer has wrong number of dimensions (expected 1, got 2)