In [None]:
# For working with tables
import pandas as pd
# For plotting
import matplotlib.pyplot as plt  
# Improves the heatmap visual
import seaborn as sns           

# Load the CSV file
file_path = "fit_logratios_good.csv"  # path to your file
df = pd.read_csv(file_path)

# Drop metadata columns, keeps just the log-ratio values
logratio_data = df.drop(columns=['locusId', 'sysName', 'desc', 'comb'])

# Calculates the variance for each gene across all conditions
variances = logratio_data.var(axis=1)

# Find the top 20 most variable genes
top_indices = variances.nlargest(20).index
top_genes = logratio_data.loc[top_indices]

# use sysName as the y-axis gene labels
gene_labels = df.loc[top_indices, 'sysName']

# set up the figure with automatic layout adjustment
plt.figure(figsize=(24, 12), constrained_layout=True)

# Visualizes the heatmap
sns.heatmap(
    top_genes,
    yticklabels=gene_labels,      # Shows the gene labels on the y-axis
    cmap="RdBu_r",                # Diverges the color map
    center=0,                     # Centers the color scale at zero
    cbar_kws={'label': 'Log2 Fold Change'}  # Shows the colorbar label
)

# Adds the title and axis labels
plt.title("Heatmap of Top 20 Most Variable Genes Across Conditions")
plt.xlabel("Conditions")
plt.ylabel("Genes")

# Rotates and sizes the x-axis labels for readability
plt.xticks(rotation=45, ha='right', fontsize=6)
plt.yticks(fontsize=9)

# Displays the plot
plt.show()
