# Welcome to Polly Python3 Notebook.

In [None]:
!sudo pip3 install https://elucidatainc.github.io/PublicAssets/builds/polly-python/tests/polly/polly_python-0.5.0_lib561-py3-none-any.whl

In [2]:
import os
from polly.auth import Polly
from polly import analyze
from polly import omixatlas
token = os.environ['POLLY_REFRESH_TOKEN']
Polly.auth(token)
analysis = analyze.Analyze()
oa = omixatlas.OmixAtlas()

Case 1 - repo other than bulk rna seq 

In [3]:
repo = "bulkrnaseq_staging_oa"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Only the 'geo_transcriptomics_omixatlas' omixatlas is supported currently.

Case 2 - dataset id not present in the OA (error message needs to be improved in get_metadata)

In [4]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE123456_GPL12345_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

Exception: The index provided by you is not applicable for this dataset. For gct files, please use samples and for h5ad files, please use samples_singlecell. Please ensure that the dataset_id mentioned is present in the repo_key mentioned in the function parameters. If any issue persists, please contact polly.support@elucidata.io

Case 3 - Design formula contains only 1 dataset

In [3]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Design formula should contain atleast two datasets.

Case 4 - The list for a dataset contains only 1 dict

In [5]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Design formula list for dataset_id 'GSE144269_GPL24676_raw' should have length of two with dictionaries for control and perturbation respectively.

Case 5 - The list for a dataset contains more than 2 dicts

In [6]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'},{'tumor_non_tumor':'none'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Design formula list for dataset_id 'GSE144269_GPL24676_raw' should have length of two with dictionaries for control and perturbation respectively.

Case 6 - The column in the design formula is not present in the sample level metadata

In [7]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'some_random_column':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Column 'some_random_column' does not exist in the DataFrame.

Case 7 - The column is present but the value given is not present in the column

In [8]:
# Here the value should be 'non-tumor' but user has inputted 'non tumor'
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Value 'non tumor' does not exist in column 'tumor_non_tumor'

Case 8 - Using curated columns with list type
* Please input the values like so '[string_values]' only
* If the list has comma separated strings, input them as '[carcinoma, hepatocellular]' with no quotes inside the square brackets

Case 8.1 - Inputting curated column value without square brackets ('Normal')

In [10]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE114564_GPL11154_raw' : 
                [{'curated_disease':'Normal'}, {'curated_disease':'[Carcinoma, Hepatocellular]'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Value 'Normal' does not exist in column 'curated_disease'

Case 8.2 - Inputting curated column value with quotes for string inside square brackets (['Normal'])

In [12]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE114564_GPL11154_raw' : 
                [{'curated_disease':['Normal']}, {'curated_disease':'[Carcinoma, Hepatocellular]'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Value '['Normal']' does not exist in column 'curated_disease'

Case 8.3 - Inputting curated column value with quotes inside and outside square brackets ('['Normal']')

In [13]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE114564_GPL11154_raw' : 
                [{'curated_disease':['Normal']}, {'curated_disease':'[Carcinoma, Hepatocellular]'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 14164
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

paramException: paramException (parameter error): Value '['Normal']' does not exist in column 'curated_disease'

Case 9 - Workspace does not exist or is not owned by user

In [15]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE114564_GPL11154_raw' : 
                [{'curated_disease':'[Normal]'}, {'curated_disease':'[Carcinoma, Hepatocellular]'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= 12345
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

Cohort csv file created from the design formulae.


RequestException: ('Invalid Project Ownership', 'Not enough permissions over the Project id 12345')

Case 10 - Workspace id is given as string instead of int

In [16]:
repo = "geo_transcriptomics_omixatlas"
designformula = {'GSE144269_GPL24676_raw' : 
                 [{'tumor_non_tumor':'non-tumor'}, {'tumor_non_tumor':'tumor'}],
                'GSE114564_GPL11154_raw' : 
                [{'curated_disease':'[Normal]'}, {'curated_disease':'[Carcinoma, Hepatocellular]'}],
                'GSE77314_GPL9052_raw' : 
                [{'curated_control':'0'}, {'curated_control':'1'}]
                }
ws_id= '14164'
analysis_name = 'test'

analysis.run_meta_analysis(repo, ws_id, analysis_name, designformula)

Cohort csv file created from the design formulae.


InvalidParameterException: Empty or Invalid Parameters = workspace_id.