Skip to content

Commit

Permalink
* Added additional parameters to 'Hierarchical clustergram plot' (Cou…
Browse files Browse the repository at this point in the history
…ntFilter.clustergram).

* The 'Hierarchical clustergram plot' function should now run faster on large datasets.
  • Loading branch information
GuyTeichman committed May 28, 2024
1 parent b99aec7 commit 627c45d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ History
3.13.0 (2024-06-??)
-------------------

Added
******
* Added additional parameters to 'Hierarchical clustergram plot' (CountFilter.clustergram).

Changed
*******
* The 'Hierarchical clustergram plot' function should now run faster on large datasets.

Fixed
******
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ aiolimiter>=1.0.0
tenacity>=8.2.3
mslex>=1.1.0
nest-asyncio>=1.6.0
fastcluster>=1.2.6
30 changes: 27 additions & 3 deletions rnalysis/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -4963,10 +4963,26 @@ def clustergram(self, sample_names: Union[param_typing.ColumnNames, Literal['all
linkage: Literal['Single', 'Average', 'Complete', 'Ward', 'Weighted', 'Centroid', 'Median'
] = 'Average', title: Union[str, Literal['auto']] = 'auto', title_fontsize: float = 20,
tick_fontsize: float = 12, colormap: ColorMap = 'inferno',
colormap_label: str = r"$\log_2$(Normalized reads + 1)") -> plt.Figure:
colormap_label: Union[Literal['auto'], str] = 'auto', cluster_columns: bool = True,
log_transform:bool=True, z_score_rows: bool = False
) -> plt.Figure:
"""
Performs hierarchical clustering and plots a clustergram on the base-2 log of a given set of samples.
:param z_score_rows: if True, the rows will be z-scored before clustering. \
This will normalize the rows to have a mean of 0 and a standard deviation of 1, such that \
genes will be clustered based on the similarity of their expression pattern instead of \
absolute expression levels.
:type z_score_rows: bool (default=False)
:param colormap_label: label for the colorbar
:type colormap_label: str or 'auto' (default='auto')
:param cluster_columns: if True, both rows and columns will be clustered. Otherwise, \
only the rows will be clustered, and columns will maintain their original order.
:type cluster_columns: bool (default=True)
:param colormap: the colormap to use in the clustergram.
:type colormap: str
:param log_transform: if True, will apply a log transform (log2) to the data before clustering.
:type log_transform: bool (default=True)
:type sample_names: 'all' or list.
:param sample_names: the names of the relevant samples in a list. \
Example input: ["condition1_rep1", "condition1_rep2", "condition1_rep3", \
Expand Down Expand Up @@ -5006,11 +5022,19 @@ def clustergram(self, sample_names: Union[param_typing.ColumnNames, Literal['all

if sample_names == 'all':
sample_names = list(self.columns)
if colormap_label == 'auto':
colormap_label = r"$\log_2$(Normalized reads + 1)" if log_transform else "Normalized reads"
if z_score_rows:
colormap_label+="\nZ-score"

data = np.log2(self.df[sample_names] + 1) if log_transform else self.df[sample_names]

print('Calculating clustergram...')
with pd.option_context("mode.copy_on_write", False):
clustergram = sns.clustermap(np.log2(self.df[sample_names] + 1), method=linkage, metric=metric,
clustergram = sns.clustermap(data, method=linkage, metric=metric,
cmap=sns.color_palette(colormap, as_cmap=True), yticklabels=False,
cbar_kws=dict(label=colormap_label))
cbar_kws=dict(label=colormap_label), col_cluster=cluster_columns,
z_score = 0 if z_score_rows else None)

# set colored borders for colorbar and heatmap
cbar = clustergram.ax_cbar.get_children()[-1]
Expand Down

0 comments on commit 627c45d

Please sign in to comment.