In [None]:
#calculate average fraction genome shared between each pair of votus
import pandas as pd
import numpy as np

# Load the data
file_path = "/Users/jferrare/Documents/Good Lab Work/Gut Phage/February_2025/BLAST_f95_f99_votus_w_100mem_diff_votu.txt"
df = pd.read_csv(file_path, sep='\t')

# Add new column
df['new_col'] = df['f95'] / df[['len1', 'len2']].min(axis=1)

# Compute mean and std using groupby
grouped = df.groupby(['s1_votu', 's2_votu'])['new_col'].agg(['mean', 'std']).reset_index()

# Save pairwise statistics
output_dir = "/Users/jferrare/Documents/Good Lab Work/Gut Phage/February_2025/"
output_file = output_dir + "votu_pairwise_stats_f95_100mem.txt"
grouped.to_csv(output_file, sep='\t', index=False)


print(f"Pairwise statistics saved to {output_file}")


In [None]:
#plot heatmap
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, leaves_list
from matplotlib.colors import LogNorm

# Pivot to matrix format
unique_votus = sorted(set(df['s1_votu']).union(set(df['s2_votu'])))
mean_df = grouped.pivot(index='s1_votu', columns='s2_votu', values='mean').reindex(index=unique_votus, columns=unique_votus, fill_value=0)
mean_df = mean_df.fillna(0).replace([np.inf, -np.inf], 0)


# Perform hierarchical clustering
linkage_matrix = linkage(mean_df, method='ward')
ordered_indices = leaves_list(linkage_matrix)
ordered_votus = [unique_votus[i] for i in ordered_indices]
mean_df = mean_df.reindex(index=ordered_votus, columns=ordered_votus)
mean_df = mean_df.loc[(mean_df >= 0.1).any(axis=1), (mean_df >= 0.1).any(axis=0)]


# Generate heatmap with log scale colorbar
plt.figure(figsize=(14, 10),dpi=450)
sns.heatmap(mean_df, cmap='Reds', xticklabels=False, yticklabels=False, vmin=0, vmax=1)

# Title and labels
plt.title("Hierarchical Clustering Heatmap of Mean Values")
plt.xlabel("s2_votu")
plt.ylabel("s1_votu")
plt.xticks(rotation=90)
plt.yticks(rotation=0)

# Adjust colorbar to reflect the desired scale
cbar = plt.gca().collections[0].colorbar
#save figure
plt.savefig('/Users/jferrare/Documents/Good Lab Work/Gut Phage/February_2025/mean_heatmap_f99_log_transformed_100_mem_votus.png')

plt.show()