## Plot pangenome heatmaps
- Compare the kmers in pigs v humans. (100 metaG v 100 metaG)
- What kmers are in both, neither, or just one?
- Do we see patterns in heatmaps?

In [None]:
# imports
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob
from sklearn.preprocessing import StandardScaler

In [None]:
# set filepaths
dmp_folder = "../compare_pig_human/100_v100/dmp/"
plot_folder = "../compare_pig_human/100_v100/plots"

In [None]:
# plot each of the comparisons, for species picked
# Loop through each file in the input folder
for filename in os.listdir(dmp_folder):
    if filename.endswith(".tsv"):
        file_path = os.path.join(dmp_folder, filename)
        base_name = os.path.splitext(filename)[0]
        output_file = f"{base_name}.density.png"
        output_path = os.path.join(plot_folder, output_file)
        plot_title = base_name
        
        # Create the density plot
        density_plot(file_path, output_path, plot_title)

# plot each of the comparisons, for species picked
# Loop through each file in the input folder
for filename in os.listdir(dmp_folder):
    if filename.endswith(".tsv"):
        file_path = os.path.join(dmp_folder, filename)
        base_name = os.path.splitext(filename)[0]
        output_file = f"{base_name}.density_no0.png"
        output_path = os.path.join(plot_folder, output_file)
        plot_title = base_name
        
        # Create the density plot
        density_plot_no0(file_path, output_path, plot_title)

In [None]:
def density_plot(infile, outfile, plot_title):
    """ create heatmap for hashes, found in pig and/or human metags"""
    df = pd.read_csv(infile, sep='\s', engine='python', 
                 names=["hash", "human", "pig"])
    plt.figure(figsize=(10, 6))
    sns.kdeplot(x=df['human'], y=df['pig'], fill=True, cmap="viridis", cbar=True)
    plt.title(plot_title)
    plt.xlabel('Human')
    plt.ylabel('Pig')
    plt.savefig(output_path)
    plt.close()

def density_plot_no0(infile, outfile, plot_title):
    """ create heatmap for hashes, found in pig and/or human metags, 
    hashes that are found only once are ommitted"""
    df = pd.read_csv(infile, sep='\s', engine='python', 
                 names=["hash", "human", "pig"])
    df = df[~(((df['human'] == 1) & (df['pig'] == 0)) | ((df['human'] == 0) & (df['pig'] == 1)))]
    plt.figure(figsize=(10, 6))
    sns.kdeplot(x=df['human'], y=df['pig'], fill=True, cmap="viridis", cbar=True)
    plt.title(plot_title)
    plt.xlabel('Human')
    plt.ylabel('Pig')
    plt.savefig(output_path)
    plt.close()