# Using a Heatmap to Visualize Protective/Resilient Variants

* **Project:** ADRD Genetic Diversity in Biobanks
* **Version:** Python/3.10
* **Last Updated:** 29-August-2024

## Notebook Overview
Visualizing the results of protective/resilience variants using a heatmap

## Variables used 
`${COHORT}` = AD, Dementia, Control

In [None]:
df_z = pd.read_csv("Heatmap_protective_${COHORT}.csv")
df_z.head()

In [None]:
import pandas as pd

# Load the CSV file
df_z = pd.read_csv('Heatmap_protective_${COHORT}.csv')

# Filter out rows where the 'Variants' column is empty
df_z = df_z[df_z['Variants'].notna()]

# Find the maximum value in the numeric columns
max_value = df_z.iloc[:, 1:].max().max()

# Print the maximum value
print("The maximum value in the dataset is:", max_value)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import pandas as pd
import matplotlib as mpl
from matplotlib import gridspec

# Create a figure with three subplots (one row, three columns)
fig, axs = plt.subplots(1, 3, figsize=(30, 12))  # Adjust width as needed

# Define file names, titles, and norms for each heatmap
file_names = ['Heatmap_protective_AD.csv', 'Heatmap_protective_Dementia.csv', 'Heatmap_protective_Controls.csv']
titles = ['AD_e4/e4', 'Related Dementia_e4/e4', 'Controls_e4/e4']
norms = [Normalize(vmin=0, vmax=27.27), Normalize(vmin=0, vmax=6.76), Normalize(vmin=0, vmax=2.84)]
text_positions = [0.5, 2.3, 1.2]  # Text positions for AD, Related Dementia, Controls

# Iterate over the files, titles, and subplots to create each heatmap
for i, (file_name, title, norm, text_position) in enumerate(zip(file_names, titles, norms, text_positions)):
    # Load the data
    df_z = pd.read_csv(file_name)
    df_z = df_z[df_z['Variants'].notna()]

    # List of ancestry columns (all columns except the first one)
    cols = list(df_z.columns)[1:]

    # Create a continuous colormap
    cmap = sns.color_palette("Spectral_r", as_cmap=True)

    # Create heatmap with continuous color palette
    sns.heatmap(df_z[cols], cmap=cmap, norm=norm, cbar_kws={"shrink": 0.8}, ax=axs[i])

    # Set axis labels
    axs[i].set_ylabel('SNPs', fontsize=13, fontweight='bold')
    axs[i].set_xlabel('Ancestry', fontsize=13, fontweight='bold')

    # Correctly set y-ticks and y-tick labels to match the number of SNPs
    axs[i].set_yticks(range(len(df_z)))
    axs[i].set_yticklabels(df_z['Variants'], rotation=0, fontsize=13, ha='right')

    # Rotate x-axis labels for better readability
    axs[i].set_xticklabels(axs[i].get_xticklabels(), rotation=45, ha='right', fontsize=11)

    # Get the colorbar from the heatmap
    colorbar = axs[i].collections[0].colorbar

    # Adjust colorbar tick label size
    colorbar.ax.tick_params(labelsize=13)

    # Adjust text label position above the colorbar
    colorbar.ax.text(text_position, 1.05, title, ha='center', va='center', fontsize=13, fontweight='bold', transform=colorbar.ax.transAxes)

    # Add a label to the colorbar to indicate percentage
    colorbar.ax.set_ylabel('Percentage', fontsize=12)

# Adjust layout to prevent overlap
plt.tight_layout()

# Save the combined plot
plt.savefig('combined_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()
