- Author: Peter Riesebos
- Purpose: Script used to inspect summary stats, expression data, genotype data and count matrices from my 1000IBD data and their origial publication data
- Input: 1000IBD summary stat files, expression data, count files and 1000IBD genotype file from own data and from from Hu et al., 2021 paper.
- Output: correlations, scatter plots, and other graphs

## Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr, spearmanr

In [None]:
# summary stats from GTEx and the 1000IBD cohort + combined exp matrix from my own three datasets (1000IBD Werna, pub rna and GTEx)
paper_eqtls = pd.read_excel("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/qtl/eqtl/werna_paper_summary_stats.xlsx", header=1)
own_eqtls = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/qtl/eqtl/output_no_ver/merged_topeffects_final.txt", sep='\t')
gtex_fastqtl = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/GTEx/fastqtl_eqtls/Colon_Transverse.v8.signif_variant_gene_pairs.txt.gz", sep='\t')
exp = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/combined_expression_matrix_protein_coding_filtered_no_zeros.txt.gz", sep='\t')
top_1000IBD_own_1000IBD_all = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/intersect/1000IBD_paper_overlap.txt", sep='\t')

In [None]:
# own 1000IBD genotype data
own_geno = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/genotype/werna_merged_filtered_chrs.vcf.gz", sep='\t', header=45)

In [None]:
# geno Grch38, chr1
own_gen_chr1_38 = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/paper_genotype/european_GRCh38_lifted_maf0001_chr1.vcf.gz", sep='\t', header=218)

In [None]:
# Werna 1000IBD cohort count matrices from both the original paper as well as Joost Bakker his exp QC pipeline
paper_count = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/gene_counts/Merged.GeneCount.txt", sep="\t")
own_count = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/rna/qc/output/3_create_gene_counts_matrix/1000IBD_gene_counts.txt.gz", sep='\t')
own_normalized_counts = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/rna/qc/output/9_covariate_correction/1000IBD_gene_counts-TMM.SampleSelection.ProbesWithZeroVarianceRemoved.Log2Transformed.forcenormal.covariatecorrected.txt.gz.CovariatesRemovedOLS.txt.gz", sep="\t")

In [None]:
# Werna g2e2d linkfile
linkfile = pd.read_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/qtl/eqtl/annotations/g2e2d_1000IBD.tsv", sep='\t', names=['vcf_sample','exp_sample', 'dataset'])

## Data overview

In [None]:
# # temp code to export tweaked paper_eqtls file
# paper_eqtls = paper_eqtls.rename(columns={'FlippedAlleles':'SNPAlleles', 'ExpressionGene':'Gene','Chr':'GeneChr','Z-score':'MetaPZ', 'rsID':'SNP'})
# paper_eqtls.to_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/qtl/eqtl/paper_eqtls_tweaked.tsv", index=False, sep='\t')

In [None]:
paper_eqtls

In [None]:
own_eqtls

In [None]:
top_1000IBD_own_1000IBD_all

In [None]:
gtex_fastqtl

In [None]:
exp

In [None]:
paper_count

In [None]:
own_count

In [None]:
linkfile

## Data manipulation

In [None]:
merged_df = pd.merge(
    paper_eqtls, 
    top_1000IBD_own_1000IBD_all, 
    how='left', 
    on=['Gene', 'GeneChr'], 
    indicator=True
)

# Filter out the rows where the match was found in the subset (i.e. '_merge' column == 'left_only')
not_in_subset = merged_df[merged_df['_merge'] == 'left_only'].drop(columns='_merge')

# Display or save the resulting DataFrame
not_in_subset

In [None]:
paper_eqtls = paper_eqtls[paper_eqtls['AllelFre'] != 'AllelFre']
paper_eqtls["AllelFre"] = paper_eqtls["AllelFre"].astype(float)

In [None]:
paper_eqtls['Z-score'] = paper_eqtls['Beta'] / paper_eqtls['SE']

In [None]:
paper_eqtls

In [None]:
gtex = gtex_fastqtl.copy()

# Filter for unique values in the 'gene_id' column
unique_gene_ids = gtex['gene_id'].unique()

# Convert the array back to a DataFrame and strip version numbering
gtex_unique_genes = pd.DataFrame(unique_gene_ids, columns=['gene_id'])
gtex_unique_genes['gene_id'] = gtex_unique_genes['gene_id'].str.split('.').str[0]

# Remove rows where 'gene_id' is NaN
gtex_unique_genes = gtex_unique_genes.dropna(subset=['gene_id'])

# Optionally, if you're modifying the original DataFrame:
gtex['gene_id'] = gtex['gene_id'].str.split('.').str[0].drop_duplicates()
gtex = gtex.dropna(subset=['gene_id'])

In [None]:
gtex

In [None]:
paper_count.columns = [name.split('.')[0] for name in paper_count.columns]
paper_count.drop(paper_count.tail(5).index, inplace = True)
sorted_columns = ['ID'] + sorted(paper_count.columns[1:])
paper_count = paper_count[sorted_columns]
paper_count = paper_count.sort_values(by='ID')
paper_count["ID"] = paper_count["ID"].str.split('.').str[0]
paper_count.rename(columns = {'ID':'Gene'}, inplace = True)

In [None]:
paper_count

In [None]:
# paper_count.to_csv("/groups/umcg-fg/tmp04/projects/gut-bulk/ongoing/2024-02-07-GutPublicRNASeq/datasets/Werna/gene_counts/Merged.GeneCount.Adjusted.txt", sep='\t', index=False)

In [None]:
sorted_columns = ['Gene'] + sorted(own_count.columns[1:])
own_count = own_count[sorted_columns]
own_count = own_count.sort_values(by='Gene')
own_count["Gene"] = own_count["Gene"].str.split('.').str[0]
own_count.reset_index(inplace=True, drop=True)

In [None]:
own_count

In [None]:
own_normalized_counts.rename(columns = {'-':'Gene'}, inplace = True)
own_normalized_counts = own_normalized_counts.sort_values(by='Gene')

In [None]:
own_geno_copy = own_geno.iloc[:,:8]

# Perform the first merge: where alleles are aligned (no flip)
geno_sum_overlap_normal = pd.merge(
    own_geno_copy,
    paper_eqtls,
    how='inner',
    left_on=["#CHROM", "ID", "REF", "ALT"],  # REF matches Allele1 and ALT matches Allele0
    right_on=["Chr", "rsID", "Allele1", "Allele0"]
)

# Perform the second merge: where alleles are flipped (REF matches Allele0 and ALT matches Allele1)
geno_sum_overlap_flipped = pd.merge(
    own_geno_copy,
    paper_eqtls,
    how='inner',
    left_on=["#CHROM", "ID", "REF", "ALT"],  # REF matches Allele0 and ALT matches Allele1
    right_on=["Chr", "rsID", "Allele0", "Allele1"]
)

# Combine the two results
geno_sum_overlap = pd.concat([geno_sum_overlap_normal, geno_sum_overlap_flipped])

# Optional: drop duplicates if any SNPs are found in both configurations
geno_sum_overlap = geno_sum_overlap.drop_duplicates()

# Display the result
geno_sum_overlap.head()

In [None]:
# Function to split the column into key-value pairs
def split_info_column(row):
    # Split by ';'
    pairs = row.split(';')
    # Create a dictionary for key-value pairs
    result = {}
    for pair in pairs:
        if '=' in pair:
            # Split by '=' for key-value pairs
            key, value = pair.split('=')
            result[key] = value
        else:
            # If no '=', it's a flag, keep it as is
            result[pair] = True
    return pd.Series(result)

# Apply the function to the DataFrame
df_split = geno_sum_overlap['INFO'].apply(split_info_column)

geno_sum_overlap_extended = pd.concat([geno_sum_overlap, df_split], axis=1)

In [None]:
geno_sum_overlap_extended

In [None]:
own_gen_chr1_38_copy = own_gen_chr1_38.iloc[:,:8]

# Perform the first merge: where alleles are aligned (no flip)
geno_chr1_overlap_normal = pd.merge(
    own_gen_chr1_38_copy,
    paper_eqtls,
    how='inner',
    left_on=["#CHROM", "ID", "REF", "ALT"],  # REF matches Allele1 and ALT matches Allele0
    right_on=["Chr", "rsID", "Allele1", "Allele0"]
)

# Perform the second merge: where alleles are flipped (REF matches Allele0 and ALT matches Allele1)
geno_chr1_overlap_flipped = pd.merge(
    own_gen_chr1_38_copy,
    paper_eqtls,
    how='inner',
    left_on=["#CHROM", "ID", "REF", "ALT"],  # REF matches Allele0 and ALT matches Allele1
    right_on=["Chr", "rsID", "Allele0", "Allele1"]
)

# Combine the two results
geno_chr1_overlap = pd.concat([geno_chr1_overlap_normal, geno_chr1_overlap_flipped])

# Optional: drop duplicates if any SNPs are found in both configurations
geno_chr1_overlap = geno_chr1_overlap.drop_duplicates()

# Display the result
geno_chr1_overlap

In [None]:
# Apply the function to the DataFrame
df_chr1_split = geno_chr1_overlap['INFO'].apply(split_info_column)

geno_chr1_overlap_extended = pd.concat([geno_chr1_overlap, df_chr1_split], axis=1)

In [None]:
geno_chr1_overlap_extended

## Data inspection

In [None]:
intersect_paper_own = set(own_eqtls["Gene"]) & set(paper_eqtls["ExpressionGene"])
print(len(intersect_paper_own))
print(len(paper_eqtls["ExpressionGene"].unique()))
print("difference =", ( len(paper_eqtls["ExpressionGene"]) - len(own_eqtls["Gene"].unique())))

In [None]:
intersect_exp_gtex = set(exp["gene"]) & set(gtex["gene_id"])
print(len(intersect_exp_gtex))
print(len(gtex["gene_id"].unique()))
print("difference =", (len(gtex["gene_id"].unique()) - len(intersect_exp_gtex)))

In [None]:
intersect_exp_werna = set(exp["gene"]) & set(paper_eqtls["ExpressionGene"])
print(len(intersect_exp_werna))
print(len(paper_eqtls["ExpressionGene"].unique()))
print("difference =", (len(paper_eqtls["ExpressionGene"].unique()) - len(intersect_exp_werna)))

In [None]:
paper_eqtls["AllelFre"].sort_values(ascending=True)

In [None]:
intersect = set(own_count["Gene"]) & set(paper_count["Gene"])
len(intersect)
print("Overlap genes with version numbering: ",len(intersect))

In [None]:
print("Amount of overlapping samples:",own_count.columns[1::].isin(paper_count.columns[1::]).sum())
print("Amount of unique samples own_count:", len(own_count.columns[1::].unique()))
print("Amount of unique samples paper_count:", len(paper_count.columns[1::].unique()))

In [None]:
paper_eqtls.head(1)

In [None]:
geno_sum_overlap_extended[["Chr","ID","rsID","AllelFre", "AF"]]

In [None]:
geno_sum_overlap_extended[geno_sum_overlap_extended["#CHROM"] == 1][["#CHROM", "POS", "ID"]]

In [None]:
geno_chr1_overlap_extended[["#CHROM", "POS", "ID"]]

## Correlations

In [None]:
correlation = geno_sum_overlap_extended[["AllelFre", "AF"]].astype(float).corr().iloc[0, 1]
# Display the Pearson correlation
print("Pearson correlation:\n",correlation)

spearman_correlation = geno_sum_overlap_extended[["AllelFre", "AF"]].astype(float).corr(method='spearman').iloc[0, 1]

# Display the Spearman correlation
print("\nSpearman correlation:\n",spearman_correlation)

In [None]:
paper_eqtls

In [None]:
top_1000IBD_own_1000IBD_all.loc[top_1000IBD_own_1000IBD_all['Flipped'] == True, 'MetaPZ'] *= -1

# Perform Pearson correlation
pearson_corr, pearson_pval = pearsonr(top_1000IBD_own_1000IBD_all['MetaPZ'], top_1000IBD_own_1000IBD_all['MetaPZ_chrom'])

# Perform Spearman correlation
spearman_corr, spearman_pval = spearmanr(top_1000IBD_own_1000IBD_all['MetaPZ'], top_1000IBD_own_1000IBD_all['MetaPZ_chrom'])

# Output the results
print(f"\nPearson correlation: {pearson_corr}, p-value: {pearson_pval}")
print(f"Spearman correlation: {spearman_corr}, p-value: {spearman_pval}")

In [None]:
plt.figure(figsize=(8, 6), facecolor='white')

# Add trendline
z = np.polyfit(top_1000IBD_own_1000IBD_all["MetaPZ"], top_1000IBD_own_1000IBD_all["MetaPZ_chrom"], 1)  # Fit linear regression (1st-degree polynomial)
p = np.poly1d(z)
plt.plot(top_1000IBD_own_1000IBD_all["MetaPZ"], p(top_1000IBD_own_1000IBD_all["MetaPZ"]), "r--")  # Plot the trendline in red dashed style

plt.scatter(top_1000IBD_own_1000IBD_all["MetaPZ"], top_1000IBD_own_1000IBD_all["MetaPZ_chrom"], alpha=0.5)
plt.title("Z-scores from overlapping 1000IBD paper summary stat top effects in 1000IBD summary stats all effects (Pearson correlation: 0.81)")
plt.xlabel("paper eQTLs - Z-score")
plt.ylabel("own eQTLs - Z-score")
plt.text(0.05, 0.95, f'N={top_1000IBD_own_1000IBD_all["Gene"].size}', ha='left', va='top', transform=plt.gca().transAxes, fontsize=12, color='black')
plt.grid()
plt.show()


In [None]:
# Find common genes
common_genes = list(set(own_count['Gene']).intersection(set(paper_count['Gene'])))

# Subset both dataframes to keep only the common genes
own_subset = own_count[own_count['Gene'].isin(common_genes)].set_index('Gene')
paper_subset = paper_count[paper_count['Gene'].isin(common_genes)].set_index('Gene')

# Ensure that the dataframes have the genes in the same order
own_subset = own_subset.loc[common_genes]
paper_subset = paper_subset.loc[common_genes]

# Compute Pearson and Spearman correlations for each column
pearson_corr = {}
spearman_corr = {}

for column in own_subset.columns:
    pearson_corr[column] = pearsonr(own_subset[column], paper_subset[column])[0]
    spearman_corr[column] = spearmanr(own_subset[column], paper_subset[column])[0]

# Convert results to DataFrame for better readability
pearson_corr_df = pd.DataFrame.from_dict(pearson_corr, orient='index', columns=['Pearson'])
spearman_corr_df = pd.DataFrame.from_dict(spearman_corr, orient='index', columns=['Spearman'])

In [None]:
plt.plot(pearson_corr_df)
plt.xticks([])
plt.show()

In [None]:
plt.plot(spearman_corr_df)
plt.xticks([])
plt.show()

In [None]:
pearson_corr_df.describe()

In [None]:
spearman_corr_df.describe()

In [None]:
# first two steps are the same as in the cell above
# Transpose the dataframes for row-wise (gene-wise) correlation
own_subset = own_subset.T  # Transpose: now genes are columns
paper_subset = paper_subset.T  # Transpose: now genes are columns

# Compute Pearson and Spearman correlations for each gene (which are now columns)
pearson_corr = {}
spearman_corr = {}

for gene in own_subset.columns:
    pearson_corr[gene] = pearsonr(own_subset[gene], paper_subset[gene])[0]
    spearman_corr[gene] = spearmanr(own_subset[gene], paper_subset[gene])[0]

# Convert results to DataFrame for better readability
pearson_row_wise_corr_df = pd.DataFrame.from_dict(pearson_corr, orient='index', columns=['Pearson'])
spearman_row_wise_corr_df = pd.DataFrame.from_dict(spearman_corr, orient='index', columns=['Spearman'])


In [None]:
pearson_row_wise_corr_df.describe()

In [None]:
spearman_row_wise_corr_df.describe()

In [None]:
# Find common genes and subset both DataFrames
common_genes = list(set(own_count['Gene']).intersection(set(paper_count['Gene'])))
own_subset = own_count[own_count['Gene'].isin(common_genes)].set_index('Gene')
paper_subset = paper_count[paper_count['Gene'].isin(common_genes)].set_index('Gene')

# Ensure that the rows are in the same order of genes in both data sets
own_subset = own_subset.loc[common_genes]
paper_subset = paper_subset.loc[common_genes]

# Transpose DataFrames so samples are rows and genes are columns
own_transposed = own_subset.transpose()
paper_transposed = paper_subset.transpose()

# Use numpy to compute Pearson correlations for each own sample vs. all paper samples
# This generates a single correlation matrix efficiently
correlation_matrix = np.corrcoef(own_transposed, paper_transposed, rowvar=True)

# Extract the one-to-many Pearson correlations only
pearson_corr_matrix = correlation_matrix[:len(own_transposed), len(own_transposed):]

# Convert results to a DataFrame for better readability
pearson_corr_df = pd.DataFrame(pearson_corr_matrix, index=own_transposed.index, columns=paper_transposed.index)

In [None]:
pearson_corr_df

In [None]:
test_df = pearson_corr_df.copy()

In [None]:
test_df["max"] = test_df.idxmax(axis=1)
matching_results = test_df["max"] == test_df.index

count_yes = matching_results.sum()
count_no = len(matching_results) - count_yes

# Output the counts
print(f"Count of 'Yes' (matching): {count_yes}")
print(f"Count of 'No' (not matching): {count_no}")

In [None]:
test_df2 = pearson_corr_df.copy()
test_df2["max"] = test_df2.idxmax(axis=0)
matching_results = test_df2["max"] == test_df2.index

count_yes = matching_results.sum()
count_no = len(matching_results) - count_yes

# Output the counts
print(f"Count of 'Yes' (matching): {count_yes}")
print(f"Count of 'No' (not matching): {count_no}")

In [None]:
# Heatmap for Pearson correlations
plt.figure(figsize=(20, 16))
sns.heatmap(pearson_corr_df, cmap="coolwarm", center=0, annot=False)
plt.title("Pearson Correlation Heatmap (Own vs. Paper Samples)")
plt.xlabel("Own Samples")
plt.ylabel("Paper Samples")
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# Find common genes and subset both DataFrames
common_genes2 = list(set(own_normalized_counts['Gene']).intersection(set(paper_count['Gene'])))
own_subset2 = own_normalized_counts[own_normalized_counts['Gene'].isin(common_genes2)].set_index('Gene')
paper_subset2 = paper_count[paper_count['Gene'].isin(common_genes2)].set_index('Gene')

# Ensure that the rows are in the same order of genes in both data sets
own_subset2 = own_subset2.loc[common_genes2]
paper_subset2 = paper_subset2.loc[common_genes2]

# Transpose DataFrames so samples are rows and genes are columns
own_transposed2 = own_subset2.transpose()
paper_transposed2 = paper_subset2.transpose()

# Use numpy to compute Pearson correlations for each own sample vs. all paper samples
# This generates a single correlation matrix efficiently
correlation_matrix2 = np.corrcoef(own_transposed2, paper_transposed2, rowvar=True)

# Extract the one-to-many Pearson correlations only
pearson_corr_matrix2 = correlation_matrix2[:len(own_transposed2), len(own_transposed2):]

# Convert results to a DataFrame for better readability
pearson_corr_df_normalized = pd.DataFrame(pearson_corr_matrix2, index=own_transposed2.index, columns=paper_transposed2.index)

In [None]:
# Heatmap for Pearson correlations
plt.figure(figsize=(20, 16))
sns.heatmap(pearson_corr_df_normalized, cmap="coolwarm", center=0, annot=False)
plt.title("Pearson Correlation Heatmap (Own vs. Paper Samples)")
plt.xlabel("Own Samples")
plt.ylabel("Paper Samples")
plt.xticks([])
plt.yticks([])
plt.show()

## Confusion matrix

In [None]:
linkfile

In [None]:
own_gene_ids = own_count.iloc[:, 0]
paper_gene_ids = paper_count.iloc[:, 0]
overlapping_genes = own_gene_ids[own_gene_ids.isin(paper_gene_ids)]

own_count_filtered = own_count[own_count.iloc[:, 0].isin(overlapping_genes)]
paper_count_filtered = paper_count[paper_count.iloc[:, 0].isin(overlapping_genes)]

own_count_filtered = own_count_filtered.sort_values(by=own_count_filtered.columns[0])
paper_count_filtered = paper_count_filtered.sort_values(by=paper_count_filtered.columns[0])

own_samples = own_count_filtered.iloc[:, 1:]  # Skip the first column (gene IDs)
paper_samples = paper_count_filtered.iloc[:, 1:]  # Skip the first column (gene IDs)

common_samples = own_samples.columns.intersection(paper_samples.columns)

correlations = []
for sample in common_samples:
    # Compute Pearson correlation for the same sample between own_samples and paper_samples
    corr_value = own_samples[sample].corr(paper_samples[sample])
    correlations.append(corr_value)

correlation_df = pd.DataFrame({'Sample': common_samples, 'Correlation': correlations})

plt.figure(figsize=(12, 6))
sns.barplot(x='Sample', y='Correlation', data=correlation_df, palette="coolwarm")
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.title('Pearson Correlation Between Samples with Same Name (own_count vs paper_count)')
plt.ylabel('Pearson Correlation')
plt.xlabel('Sample')
plt.ylim(-1, 1)  # Pearson correlation ranges from -1 to 1
plt.xticks([])
plt.show()

In [None]:
own_gene_ids = own_count.iloc[:, 0]
paper_gene_ids = paper_count.iloc[:, 0]
overlapping_genes = own_gene_ids[own_gene_ids.isin(paper_gene_ids)]

own_count_filtered = own_count[own_count.iloc[:, 0].isin(overlapping_genes)]
paper_count_filtered = paper_count[paper_count.iloc[:, 0].isin(overlapping_genes)]

own_count_filtered = own_count_filtered.sort_values(by=own_count_filtered.columns[0])
paper_count_filtered = paper_count_filtered.sort_values(by=paper_count_filtered.columns[0])

own_samples = own_count_filtered.iloc[:, 1:]  # Skip the first column (gene IDs)
paper_samples = paper_count_filtered.iloc[:, 1:]  # Skip the first column (gene IDs)

common_samples = own_samples.columns.intersection(paper_samples.columns)

spearman_correlations = []
for sample in common_samples:
    # Compute Spearman correlation for the same sample between own_samples and paper_samples
    corr_value = own_samples[sample].corr(paper_samples[sample], method='spearman')
    spearman_correlations.append(corr_value)

spearman_correlation_df = pd.DataFrame({'Sample': common_samples, 'Spearman Correlation': spearman_correlations})

plt.figure(figsize=(12, 6))
sns.barplot(x='Sample', y='Spearman Correlation', data=spearman_correlation_df, palette="coolwarm")
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability
plt.title('Spearman Correlation Between Samples with Same Name (own_count vs paper_count)')
plt.ylabel('Spearman Correlation')
plt.xlabel('Sample')
plt.xticks([])
plt.ylim(-1, 1)  # Spearman correlation ranges from -1 to 1
plt.show()