# This Notebook offers a general format of accessing a dataset, modify and then upload the updated dataset uisng the new version of pydsdb package

## 1. A little bit of set-up

#### Please insert your inputs

In [1]:
DSID = 'DS000015647'    # the dataset Id based on what you received when you submitted your dataset
Version = '1'   # the version of your dataset
DEV = False    # A boolean based on whether your dataset saved to the dev server
your_experiment = "raw_qc" # A string that is a key known for our desired experimnet that you wish to work on
New_experiment = "new" # A string that is a key known for the updated data saved by that that name

In [24]:
import pydsdb
import scanpy as sc
import multiassayexperiment as mae
import singlecellexperiment as sce
import pandas as pd
from gpauth import GPAuth

In [3]:
# to ignore SSL errors
from os import environ
environ["GP_DISABLE_SSL_VERIFICATION"] = "True"

In [4]:
%load_ext autoreload
%autoreload 2

#### Check the versions to be compatible with what we want

In [5]:
pydsdb.__version__

'1.3.4'

In [6]:
mae.__version__

'0.2.4'

## 2. Access and collect Dataset

In [28]:
dm = pydsdb.get_dataset(DSID, version=Version, dev=DEV)


08:52:11 -> Collating dataset: 'DS000015647' metadata.
08:52:11 -> Retrieving all project: 'DS000015647@1' metadata.

08:52:11 -> Reading: 'DS000015647'.

08:52:11 -> Reading: 'DS000015647 -> Experiment: feature'.


  for group, rows in agroups:


#### The output is a MultiAssayExperiment object

In [29]:
print(dm)

Class MultiAssayExperiment with 1 experiments and 97 samples 
  experiments:  
    feature: Class SummarizedExperiment with 58307 features and 97 samples 
  assays: ['counts'] 
  features: Index([], dtype='object') 
  sample data: Index([], dtype='object')


#### Object.experiments is a dictionary of experimnets with SingleCellExperiment object

In [30]:
print(dm.experiments)

{'feature': <summarizedexperiment.SummarizedExperiment.SummarizedExperiment object at 0x2aab09b7a940>}


#### You can see a list of all experiments to select from them

In [31]:
list(dm.experiments.keys())

['feature']

In [32]:
list(dm.experiments.keys())[0]

'feature'

In [33]:
your_experiment = list(dm.experiments.keys())[0]

In [34]:
print(dm.experiments[your_experiment])

Class SummarizedExperiment with 58307 features and 97 samples 
  assays: ['counts'] 
  features: Index([], dtype='object') 
  sample data: Index([], dtype='object')


#### Convert SingleCellExperiment object to AnnData object

In [35]:
adata= dm.experiments[your_experiment].toAnnData()

In [36]:
adata

AnnData object with n_obs × n_vars = 97 × 58307
    uns: '.internal'
    layers: 'counts'

In [39]:
adata.layers["counts"]

array([[ 25,   0,  24, ...,   0,   0,   0],
       [ 27,   0,  26, ...,   0,   0,   0],
       [ 49,   0,  60, ...,   0,   0,   0],
       ...,
       [ 23,   0,  54, ...,   0,   0,   0],
       [ 13,   0,  29, ...,   0,   0,   0],
       [194,   0, 151, ...,   0,   0,   0]], dtype=int32)

In [25]:
adata.write("/gstore/project/crc_recursion_2/NGS5425/quantseq_pool.h5ad")

In [26]:
bdata = sc.read("/gstore/project/crc_recursion_2/NGS5425/quantseq_pool.h5ad")

In [27]:
bdata

AnnData object with n_obs × n_vars = 97 × 58307
    uns: '.internal'
    layers: 'counts'

In [43]:
bdata.obs

A01-CX1-NCE1-5uM
A02-DLD1-NCE1-5uM
A03-RCM1-NCE1-5uM
A04-SKCO1-NCE1-5uM
A05-SW837-NCE1-5uM
...
H09-RCM1-NCE3-1uM
H10-SKCO1-NCE3-1uM
H11-SW837-NCE3-1uM
H12-SW1417-NCE3-1uM
undetermined
