Skip to content

Commit

Permalink
bump version from 0.3.1 -> 0.3.2
Browse files Browse the repository at this point in the history
major code changes and logical improvements in analysis steps
  • Loading branch information
abearab committed May 19, 2024
2 parents 067035d + 3ec6610 commit 3e7fbef
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 170 deletions.
2 changes: 1 addition & 1 deletion screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
from .ngs import Counter
from .assays import PooledScreens, GImaps

__version__ = "0.3.1"
__version__ = "0.3.2"
__author__ = "Abe Arab"
__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"
55 changes: 42 additions & 13 deletions screenpro/assays.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc

from pydeseq2 import preprocessing
from .phenoscore import runPhenoScore, runPhenoScoreForReplicate
from .utils import ann_score_df
from copy import copy
Expand All @@ -16,16 +18,16 @@ class PooledScreens(object):
pooledScreens class for processing CRISPR screen datasets
"""

def __init__(self, adata, transformation='log2(x+1)', test='ttest', n_reps=3):
def __init__(self, adata, fc_transformation='log2(x+1)', test='ttest', n_reps=3):
"""
Args:
adata (AnnData): AnnData object with adata.X as a matrix of sgRNA counts
transformation (str): transformation to apply to the data before calculating phenotype scores
fc_transformation (str): fold change transformation to apply for calculating phenotype scores
test (str): statistical test to use for calculating phenotype scores
"""
self.adata = adata
self.pdata = None
self.transformation = transformation
self.fc_transformation = fc_transformation
self.test = test
self.n_reps = n_reps
self.phenotypes = {}
Expand All @@ -41,7 +43,7 @@ def __init__(self, adata, transformation='log2(x+1)', test='ttest', n_reps=3):

def copy(self):
return copy(self)

def _add_phenotype_results(self, phenotype_name):
if phenotype_name in self.phenotype_names:
raise ValueError(f"Phenotype '{phenotype_name}' already exists in self.phenotype_names!")
Expand Down Expand Up @@ -75,7 +77,20 @@ def _calculateGrowthFactor(self, untreated, treated, db_rate_col):

return out

def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated, score_level, db_rate_col='pop_doublings', run_name=None):
def countNormalization(self):
"""
Normalize the counts data in adata.X
"""
self.adata.layers['raw_counts'] = self.adata.X.copy()

# normalize counts by sequencing depth
norm_counts, size_factors = preprocessing.deseq2_norm(self.adata.X)
# update adata object
self.adata.obs['size_factors'] = size_factors
self.adata.layers['seq_depth_norm'] = norm_counts
self.adata.X = self.adata.layers['seq_depth_norm']

def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated, score_level, db_rate_col='pop_doublings', run_name=None, **kwargs):
"""
Calculate `gamma`, `rho`, and `tau` phenotype scores for a drug screen dataset in a given `score_level`.
To normalize by growth rate, the doubling rate of the untreated and treated conditions are required.
Expand All @@ -89,23 +104,27 @@ def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated,
score_level (str): name of the score level
db_rate_col (str): column name for the doubling rate, default is 'pop_doublings'
run_name (str): name for the phenotype calculation run
**kwargs: additional arguments to pass to runPhenoScore
"""
# calculate phenotype scores: gamma, tau, rho
gamma_name, gamma = runPhenoScore(
self.adata, cond1=t0, cond2=untreated, growth_rate=db_untreated,
n_reps=self.n_reps,
transformation=self.transformation, test=self.test, score_level=score_level
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)
tau_name, tau = runPhenoScore(
self.adata, cond1=t0, cond2=treated, growth_rate=db_treated,
n_reps=self.n_reps,
transformation=self.transformation, test=self.test, score_level=score_level
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)
# TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0.
rho_name, rho = runPhenoScore(
self.adata, cond1=untreated, cond2=treated, growth_rate=np.abs(db_untreated - db_treated),
n_reps=self.n_reps,
transformation=self.transformation, test=self.test, score_level=score_level
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)

if not run_name: run_name = score_level
Expand All @@ -125,9 +144,17 @@ def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated,

# get replicate level phenotype scores
pdata_df = pd.concat([
runPhenoScoreForReplicate(self,'T0', untreated,'gamma',growth_factor_table).add_prefix('gamma_'),
runPhenoScoreForReplicate(self,'T0', treated,'tau',growth_factor_table).add_prefix('tau_'),
runPhenoScoreForReplicate(self ,untreated,treated,'rho',growth_factor_table).add_prefix('rho_')
runPhenoScoreForReplicate(
self.adata, x_label = x_label, y_label = y_label, score = score_label,
transformation=self.fc_transformation,
growth_factor_table=growth_factor_table
).add_prefix(f'{score_label}_')

for x_label, y_label, score_label in [
('T0', untreated, 'gamma'),
('T0', treated, 'tau'),
(untreated, treated, 'rho')
]
],axis=1).T
# add .pdata
self.pdata = ad.AnnData(
Expand All @@ -136,7 +163,7 @@ def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated,
var=self.adata.var
)

def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None):
def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs):
"""
Calculate phenotype scores for a flow-based screen dataset.
Expand All @@ -145,11 +172,13 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None
high_bin (str): name of the high bin condition
score_level (str): name of the score level
run_name (str): name for the phenotype calculation run
**kwargs: additional arguments to pass to runPhenoScore
"""
# calculate phenotype scores
delta_name, delta = runPhenoScore(
self.adata, cond1=low_bin, cond2=high_bin, n_reps=self.n_reps,
transformation=self.transformation, test=self.test, score_level=score_level
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)

if not run_name: run_name = score_level
Expand Down
8 changes: 8 additions & 0 deletions screenpro/ngs/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ def load_library(self, library_path, sep='\t', index_col=0, protospacer_length=1
elif self.cas_type == 'cas12':
raise NotImplementedError("Cas12 library is not yet implemented.")

# Check if the library has duplicate sequences and remove them
if library.duplicated('sequence').any():
shape_before_dedup = library.shape[0]
library = library.drop_duplicates(subset='sequence', keep='first')
shape_after_dedup = library.shape[0]
if verbose:
print(f"Warning: {shape_before_dedup - shape_after_dedup} duplicate sgRNA sequences found and removed.")

# covert to polar DataFrame
library = pl.from_pandas(library)

Expand Down
Loading

0 comments on commit 3e7fbef

Please sign in to comment.