In [1]:
import os
import glob
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from collections.abc import Iterable
from tqdm.auto import tqdm
import muon as mu
import scanpy as sc
import decoupler as dc
import snapatac2 as snap

%matplotlib inline

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Paths
sample_metadata_path = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/metadata/sample_metadata.tsv"
cellranger_path = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/processed/*/cellranger/"
cellcommander_path = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/results/sample_annotation"

outpath = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/results/sample_annotation/sample_annotation_metadata_2024_07_20.tsv"

# Metadata

In [3]:
# Load
sample_metadata_df = pd.read_csv(sample_metadata_path, sep="\t", index_col=0)
sample_metadata_df.head()

Unnamed: 0_level_0,condition,timepoint,rep,harmonized_sample_id,year,cell_line,differentiation_batch,sequencing_batch,multiome_stage,multiome_qc_status,notes
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
mo1,control,0,fail,,1.0,H1,DM023,,annotated,fail,not using H1 samples
mo3,control,0,fail,,1.0,H1,DM023,,annotated,fail,not using H1 samples
dm0b,control,0,fail,,2.0,A2,DM041,,annotated,fail,"clustered separately in integration, still has..."
0-2,control,0,1,control_0_1,3.0,A2,JE002,,annotated,pass,0-1 wasn't sequenced due to not enough cells
0-3,control,0,2,control_0_2,3.0,A2,JE002,,not generated,,


# Add initial number of barcodes detected

In [4]:
# Grab all files in the cellranger directory called "per_barcode_bc_metrics.csv"
bc_metrics_paths = sorted(glob.glob(os.path.join(cellranger_path, "*", "outs", "per_barcode_metrics.csv")))
len(bc_metrics_paths)

38

In [5]:
# For every file, read it and
initial_bcs = []
for bc_metrics_path in tqdm(bc_metrics_paths):
    sample_id = bc_metrics_path.split("/")[-3].split("_")[1]
    bc_metrics_df = pd.read_csv(bc_metrics_path)
    initial_bcs.append(bc_metrics_df.shape[0])
    if sample_id in sample_metadata_df.index:
        sample_metadata_df.loc[sample_id, "cellranger:initial_bcs_detected"] = bc_metrics_df.shape[0]
    else:
        print(f"Sample {sample_id} not in metadata")

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:01<?, ?it/s]


KeyboardInterrupt: 

# Add expected cell number from CellRanger

In [6]:
#
sample_metrics_paths = sorted(glob.glob(os.path.join(cellranger_path, "*", "outs", "summary.csv")))
len(sample_metrics_paths)

38

In [7]:
# For every file, read it and
expected_cells = []
for sample_metrics_path in sample_metrics_paths:
    sample_id = sample_metrics_path.split("/")[-3].split("_")[1]
    sample_metrics_df = pd.read_csv(sample_metrics_path)
    expected_cells.append(sample_metrics_df["Estimated number of cells"].values[0])
    if sample_id in sample_metadata_df.index:
        sample_metadata_df.loc[sample_id, "cellranger:expected_cells_cellranger"] = sample_metrics_df["Estimated number of cells"].values[0]
    else:
        print(f"Sample {sample_id} not in metadata")

# RNA

## Barcodes filtered by RNA thresholds

In [8]:
filtered_bc_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/threshold_qc", "filtered_barcodes.txt")))
len(filtered_bc_paths)

38

In [9]:
filtered_bcs = []
for filtered_bc_path in filtered_bc_paths:
    sample_id = filtered_bc_path.split("/")[-4]
    filtered_bcs_df = pd.read_csv(filtered_bc_path, header=None)
    filtered_bcs.append(filtered_bcs_df.shape[0])
    if sample_id in sample_metadata_df.index:
        sample_metadata_df.loc[sample_id, "rna:filtered_bcs"] = filtered_bcs_df.shape[0]
    else:
        print(f"Sample {sample_id} not in metadata")

## Remaining RNA barcodes

In [10]:
threshold_h5ad_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/threshold_qc", "threshold_qc.h5ad")))
len(threshold_h5ad_paths)

38

In [11]:
# Number of remaining cells after filtering
remaining_bcs = []
for threshold_h5ad_path in tqdm(threshold_h5ad_paths):
    sample_id = threshold_h5ad_path.split("/")[-4]
    threshold_adata = sc.read_h5ad(threshold_h5ad_path)
    remaining_bcs.append(threshold_adata.shape[0])
    if sample_id in sample_metadata_df.index:
        sample_metadata_df.loc[sample_id, "rna:remaining_bcs"] = threshold_adata.shape[0]
    else:
        print(f"Sample {sample_id} not in metadata")

100%|██████████| 38/38 [00:52<00:00,  1.37s/it]


## RNA initial filtered barcodes (before thresholds, less than 20 genes detected)

In [12]:
# Add in the initial filtered barcodes
sample_metadata_df["rna:initial_filtered_bcs"] = sample_metadata_df["cellranger:initial_bcs_detected"] - sample_metadata_df["rna:filtered_bcs"] - sample_metadata_df["rna:remaining_bcs"]

## Metrics post QC (pre soupx)

In [13]:
sample_metrics_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/threshold_qc", "sample_metrics.tsv")))
len(sample_metrics_paths)

38

In [14]:
# For every file, read it and
for sample_metrics_path in sample_metrics_paths:
    sample_id = sample_metrics_path.split("/")[-4]
    sample_metrics_df = pd.read_csv(sample_metrics_path, sep="\t", index_col=0).T
    if sample_id in sample_metadata_df.index:
        cols = sample_metrics_df.columns
        new_cols = [f"rna:{col}_pre_soupx" for col in cols]
        sample_metadata_df.loc[sample_id, new_cols] = sample_metrics_df[cols].values[0]
    else:
        print(f"Sample {sample_id} not in metadata")

## SoupX

In [15]:
soupx_stats_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/remove_background", "soupx_stats.pickle")))
len(soupx_stats_paths)

38

In [16]:
#
soupx_stats = {}
for soupx_stats_path in soupx_stats_paths:
    sample_id = soupx_stats_path.split("/")[-4]
    # Turn into dataframe
    with open(soupx_stats_path, "rb") as f:
        soupx_stats_dict = pickle.load(f)
    # Turn any Iterable into a comma-separated string
    for key in soupx_stats_dict:
        if isinstance(soupx_stats_dict[key], Iterable):
            soupx_stats_dict[key] = ", ".join(map(str, soupx_stats_dict[key]))
    soupx_stats[sample_id] = soupx_stats_dict
soupx_stats_df = pd.DataFrame.from_dict(soupx_stats, orient="index")
soupx_stats_df.columns = [f"rna:soupx#{col}" for col in soupx_stats_df.columns]
soupx_stats_df.head()

Unnamed: 0,rna:soupx#gene_markers_used,rna:soupx#cluster_groups_for_markers,rna:soupx#soup_contamination_fraction,rna:soupx#soup_total_counts_removed,rna:soupx#top_10_genes
0-2,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",7.49,2679628.0,"MALAT1, INS, TTR, CHGA, CCSER1, PDE4D, GCG, CA..."
11-1,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",8.4,588586.0,"TTR, INS, MALAT1, CHGA, FTL, IAPP, MT-ND4, MT-..."
12-1,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",25.41,3462642.0,"MALAT1, INS, TTR, MT-ND4, MT-CO2, MT-ATP6, MT-..."
14-2,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.15,3009841.0,"MALAT1, TTR, INS, PDE4D, HS6ST3, CCSER1, CHGA,..."
21-1,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.74,925455.0,"TTR, INS, MALAT1, CHGA, FTL, MT-ND4, MT-CO2, B..."


In [17]:
sample_metadata_df = sample_metadata_df.merge(soupx_stats_df, left_index=True, right_index=True)
sample_metadata_df.head()

Unnamed: 0,condition,timepoint,rep,harmonized_sample_id,year,cell_line,differentiation_batch,sequencing_batch,multiome_stage,multiome_qc_status,...,rna:initial_filtered_bcs,rna:median_n_genes_by_counts_pre_soupx,rna:median_total_counts_pre_soupx,rna:median_pct_counts_mt_pre_soupx,rna:median_pct_counts_ribo_pre_soupx,rna:soupx#gene_markers_used,rna:soupx#cluster_groups_for_markers,rna:soupx#soup_contamination_fraction,rna:soupx#soup_total_counts_removed,rna:soupx#top_10_genes
0-2,control,0,1,control_0_1,3.0,A2,JE002,,annotated,pass,...,694779.0,1550.0,2485.0,0.11611,0.431897,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",7.49,2679628.0,"MALAT1, INS, TTR, CHGA, CCSER1, PDE4D, GCG, CA..."
11-1,3-cyt,6,2,3-cyt_6_2,3.0,A2,JE002,,annotated,pass,...,580889.0,1902.0,3314.5,0.059462,0.482934,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",8.4,588586.0,"TTR, INS, MALAT1, CHGA, FTL, IAPP, MT-ND4, MT-..."
12-1,IFNg,6,2,IFNg_6_2,3.0,A2,JE002,,annotated,flagged,...,608351.0,800.0,975.0,0.803682,0.458015,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",25.41,3462642.0,"MALAT1, INS, TTR, MT-ND4, MT-CO2, MT-ATP6, MT-..."
14-2,palmitate,6,2,palmitate_6_2,3.0,A2,JE002,,annotated,pass,...,681390.0,1722.0,3070.0,0.053562,0.368098,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.15,3009841.0,"MALAT1, TTR, INS, PDE4D, HS6ST3, CCSER1, CHGA,..."
21-1,3-cyt,24,2,3-cyt_24_2,3.0,A2,JE002,,annotated,pass,...,639741.0,1503.0,2359.0,0.121888,0.492967,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.74,925455.0,"TTR, INS, MALAT1, CHGA, FTL, MT-ND4, MT-CO2, B..."


## Doublets

In [18]:
doublet_bc_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/detect_doublets", "doublet_barcodes.txt")))
len(doublet_bc_paths)

38

In [19]:
doublet_bcs = []
for doublet_bc_path in doublet_bc_paths:
    sample_id = doublet_bc_path.split("/")[-4]
    doublet_bcs_df = pd.read_csv(doublet_bc_path, header=None)
    doublet_bcs.append(doublet_bcs_df.shape[0])
    if sample_id in sample_metadata_df.index:
        sample_metadata_df.loc[sample_id, "rna:consensus#doublet_bcs"] = doublet_bcs_df.shape[0]
    else:
        print(f"Sample {sample_id} not in metadata")

## Remaining RNA barcodes after doublet removal

In [20]:
rna_metadata_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "rna/reduce_dimensions/cell_metadata.tsv")))
len(rna_metadata_paths)

38

In [21]:
# Number of remaining cells after filtering
for rna_metadata_path in tqdm(rna_metadata_paths):
    sample_id = rna_metadata_path.split("/")[-4]
    rna_metadata_df = pd.read_csv(rna_metadata_path, sep="\t")
    if sample_id in sample_metadata_df.index:
        num_bcs_remaining = rna_metadata_df.shape[0]
        num_clusters = rna_metadata_df["leiden_1"].nunique()
        sample_metadata_df.loc[sample_id, "rna:final_bcs"] = num_bcs_remaining
        sample_metadata_df.loc[sample_id, "rna:n_clusters_leiden_1"] = num_clusters
    else:
        print(f"Sample {sample_id} not in metadata")

100%|██████████| 38/38 [00:01<00:00, 24.59it/s]


# ATAC

## AMULET doublets

In [22]:
amulet_bcs_path = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/results/sample_annotation/amulet_bcs.txt"

In [23]:
amulet_bcs_df = pd.read_csv(amulet_bcs_path, header=None)
amulet_bcs_df.columns = ["barcode"]
amulet_bcs_df["sample_id"] = amulet_bcs_df["barcode"].apply(lambda x: x.split("#")[0])
amulet_bcs_counts = amulet_bcs_df.groupby("sample_id").size()
amulet_bcs_counts.name = "atac:amulet#doublet_bcs"
amulet_bcs_counts.head()

sample_id
0-2     939
11-1    235
12-1    426
14-2    482
21-1    330
Name: atac:amulet#doublet_bcs, dtype: int64

In [24]:
# Merge with sample metadata
sample_metadata_df = sample_metadata_df.merge(amulet_bcs_counts, left_index=True, right_index=True)
sample_metadata_df.head()

Unnamed: 0,condition,timepoint,rep,harmonized_sample_id,year,cell_line,differentiation_batch,sequencing_batch,multiome_stage,multiome_qc_status,...,rna:median_pct_counts_ribo_pre_soupx,rna:soupx#gene_markers_used,rna:soupx#cluster_groups_for_markers,rna:soupx#soup_contamination_fraction,rna:soupx#soup_total_counts_removed,rna:soupx#top_10_genes,rna:consensus#doublet_bcs,rna:final_bcs,rna:n_clusters_leiden_1,atac:amulet#doublet_bcs
0-2,control,0,1,control_0_1,3.0,A2,JE002,,annotated,pass,...,0.431897,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",7.49,2679628.0,"MALAT1, INS, TTR, CHGA, CCSER1, PDE4D, GCG, CA...",1681.0,8119.0,17.0,939
11-1,3-cyt,6,2,3-cyt_6_2,3.0,A2,JE002,,annotated,pass,...,0.482934,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",8.4,588586.0,"TTR, INS, MALAT1, CHGA, FTL, IAPP, MT-ND4, MT-...",67.0,1319.0,10.0,235
12-1,IFNg,6,2,IFNg_6_2,3.0,A2,JE002,,annotated,flagged,...,0.458015,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",25.41,3462642.0,"MALAT1, INS, TTR, MT-ND4, MT-CO2, MT-ATP6, MT-...",1036.0,7862.0,16.0,426
14-2,palmitate,6,2,palmitate_6_2,3.0,A2,JE002,,annotated,pass,...,0.368098,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.15,3009841.0,"MALAT1, TTR, INS, PDE4D, HS6ST3, CCSER1, CHGA,...",953.0,6520.0,15.0,482
21-1,3-cyt,24,2,3-cyt_24_2,3.0,A2,JE002,,annotated,pass,...,0.492967,"INS, GCG, SST, TPH1","SC-Beta, SC-Alpha, SC-Delta, SC-EC",9.74,925455.0,"TTR, INS, MALAT1, CHGA, FTL, MT-ND4, MT-CO2, B...",184.0,2463.0,13.0,330


In [25]:
# Remaining cells in SnapATAC2
snpatac2_metadata_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "atac", "cell_metadata.tsv")))
len(snpatac2_metadata_paths)

38

In [26]:
# Number of remaining cells after filtering
snapatac2_metadata_dict = {}
cols = 'n_fragment', 'frac_dup', 'tsse'
for snapatac2_metadata_path in tqdm(snpatac2_metadata_paths):
    sample_id = snapatac2_metadata_path.split("/")[-3]
    atac_metadata_df = pd.read_csv(snapatac2_metadata_path, sep="\t")
    if sample_id in sample_metadata_df.index:
        sample_atac_dict = {}
        for col in cols:
            sample_atac_dict[f"atac:median_{col}"] = atac_metadata_df[col].median()
        sample_atac_dict["atac:n_clusters_leiden_1"] = atac_metadata_df["leiden_1"].nunique()
        sample_atac_dict["atac:final_bcs"] = atac_metadata_df.shape[0]
        snapatac2_metadata_dict[sample_id] = sample_atac_dict
    else:
        print(f"Sample {sample_id} not in metadata")

100%|██████████| 38/38 [00:00<00:00, 64.78it/s]


In [27]:
# Turn into df with samples as rows indexed by sample_id
snapatac2_metadata_df = pd.DataFrame.from_dict(snapatac2_metadata_dict, orient="index")
snapatac2_metadata_df.head()

Unnamed: 0,atac:median_n_fragment,atac:median_frac_dup,atac:median_tsse,atac:n_clusters_leiden_1,atac:final_bcs
0-2,18976.0,0.283877,22.446043,9,10715
11-1,4780.0,0.324126,21.525601,8,2959
12-1,2788.0,0.259631,20.329277,6,37447
14-2,17277.0,0.335348,23.835802,9,6557
21-1,15661.0,0.249948,22.216874,10,3584


In [28]:
# Merge with sample metadata
sample_metadata_df = sample_metadata_df.merge(snapatac2_metadata_df, left_index=True, right_index=True)
sample_metadata_df.head()

Unnamed: 0,condition,timepoint,rep,harmonized_sample_id,year,cell_line,differentiation_batch,sequencing_batch,multiome_stage,multiome_qc_status,...,rna:soupx#top_10_genes,rna:consensus#doublet_bcs,rna:final_bcs,rna:n_clusters_leiden_1,atac:amulet#doublet_bcs,atac:median_n_fragment,atac:median_frac_dup,atac:median_tsse,atac:n_clusters_leiden_1,atac:final_bcs
0-2,control,0,1,control_0_1,3.0,A2,JE002,,annotated,pass,...,"MALAT1, INS, TTR, CHGA, CCSER1, PDE4D, GCG, CA...",1681.0,8119.0,17.0,939,18976.0,0.283877,22.446043,9,10715
11-1,3-cyt,6,2,3-cyt_6_2,3.0,A2,JE002,,annotated,pass,...,"TTR, INS, MALAT1, CHGA, FTL, IAPP, MT-ND4, MT-...",67.0,1319.0,10.0,235,4780.0,0.324126,21.525601,8,2959
12-1,IFNg,6,2,IFNg_6_2,3.0,A2,JE002,,annotated,flagged,...,"MALAT1, INS, TTR, MT-ND4, MT-CO2, MT-ATP6, MT-...",1036.0,7862.0,16.0,426,2788.0,0.259631,20.329277,6,37447
14-2,palmitate,6,2,palmitate_6_2,3.0,A2,JE002,,annotated,pass,...,"MALAT1, TTR, INS, PDE4D, HS6ST3, CCSER1, CHGA,...",953.0,6520.0,15.0,482,17277.0,0.335348,23.835802,9,6557
21-1,3-cyt,24,2,3-cyt_24_2,3.0,A2,JE002,,annotated,pass,...,"TTR, INS, MALAT1, CHGA, FTL, MT-ND4, MT-CO2, B...",184.0,2463.0,13.0,330,15661.0,0.249948,22.216874,10,3584


# Filtered ATAC barcodes in QC

In [29]:
sample_metadata_df["atac:filtered_bcs"] = sample_metadata_df["cellranger:initial_bcs_detected"] - sample_metadata_df["atac:amulet#doublet_bcs"] - sample_metadata_df["atac:final_bcs"]

# Annotations

In [30]:
# celltypes
annotation_cats = pd.read_csv("/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/ref/SC.islet.marker_genes.csv", index_col=1).index.unique().tolist()
annotation_cats = annotation_cats + ['other']

In [31]:
# Add columns to sample metadata with 0s for all samples
for annotation_cat in annotation_cats:
    sample_metadata_df[f"joint:n_{annotation_cat}"] = 0

In [32]:
annotate_metadata_paths = sorted(glob.glob(os.path.join(cellcommander_path, "*", "joint/annotate/annotate_metdata.tsv")))
len(annotate_metadata_paths)

38

In [33]:
#
for annotate_metadata_path in tqdm(annotate_metadata_paths):
    sample_id = annotate_metadata_path.split("/")[-4]
    annotate_metadata_df = pd.read_csv(annotate_metadata_path, sep="\t")
    
    if sample_id in sample_metadata_df.index:
        n_bcs = annotate_metadata_df.shape[0]
        n_clusters = annotate_metadata_df["wnn_leiden_1"].nunique()
        median_rna_weight = annotate_metadata_df["wnn_RNA_weight"].median()
        median_atac_weight = annotate_metadata_df["wnn_ATAC_weight"].median()
        sample_metadata_df.loc[sample_id, "joint:final_bcs"] = n_bcs
        sample_metadata_df.loc[sample_id, "joint:n_clusters_wnn_leiden_1"] = n_clusters
        sample_metadata_df.loc[sample_id, "joint:median_wnn_RNA_weight"] = median_rna_weight
        sample_metadata_df.loc[sample_id, "joint:median_wnn_ATAC_weight"] = median_atac_weight

        # Add annotation counts
        annotation_counts = annotate_metadata_df["manual_annotation"].value_counts()
        for annotation_cat in annotation_cats:
            if annotation_cat in annotation_counts:
                sample_metadata_df.loc[sample_id, f"joint:n_{annotation_cat}"] = annotation_counts[annotation_cat]
    else:
        print(f"Sample {sample_id} not in metadata")

100%|██████████| 38/38 [00:01<00:00, 23.58it/s]


# Save

In [34]:
# Reset index and name sample_id
sample_metadata_df = sample_metadata_df.reset_index().rename(columns={"index": "sample_id"})
sample_metadata_df.head()

Unnamed: 0,sample_id,condition,timepoint,rep,harmonized_sample_id,year,cell_line,differentiation_batch,sequencing_batch,multiome_stage,...,joint:n_SC.delta,joint:n_SC.EC,joint:n_endocrine_progenitor,joint:n_pancreatic_progenitor,joint:n_proliferating_alpha,joint:n_other,joint:final_bcs,joint:n_clusters_wnn_leiden_1,joint:median_wnn_RNA_weight,joint:median_wnn_ATAC_weight
0,0-2,control,0,1,control_0_1,3.0,A2,JE002,,annotated,...,64,1550,232,0,0,0,6423.0,18.0,0.567157,0.432843
1,11-1,3-cyt,6,2,3-cyt_6_2,3.0,A2,JE002,,annotated,...,0,214,49,0,0,0,890.0,8.0,0.48822,0.51178
2,12-1,IFNg,6,2,IFNg_6_2,3.0,A2,JE002,,annotated,...,49,320,31,0,4,659,3106.0,29.0,0.530606,0.469394
3,14-2,palmitate,6,2,palmitate_6_2,3.0,A2,JE002,,annotated,...,38,1479,175,0,0,0,5262.0,13.0,0.508066,0.491934
4,21-1,3-cyt,24,2,3-cyt_24_2,3.0,A2,JE002,,annotated,...,13,461,64,0,0,0,1838.0,10.0,0.466227,0.533773


In [35]:
sample_metadata_df.to_csv(outpath, sep="\t", index=False)

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/cellar/users/aklie/opt/miniconda3/envs/scverse-lite-py39/lib/python3.9/site-packages/IPython/core/history.py", line 844, in writeout_cache
    self._writeout_input_cache(conn)
  File "/cellar/users/aklie/opt/miniconda3/envs/scverse-lite-py39/lib/python3.9/site-packages/IPython/core/history.py", line 827, in _writeout_input_cache
    conn.execute("INSERT INTO history VALUES (?, ?, ?, ?)",
sqlite3.DatabaseError: database disk image is malformed


# DONE!

---