# Visualizations kmer comparison

In [1]:
import os
import pandas as pd
from qiime2 import Visualization
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
data_dir = "../data"

### Shannon Plot 

In [3]:
#importing exported files
shannon12 = pd.read_csv("shannon_12/alpha-diversity.tsv", sep="\t")
shannon14 = pd.read_csv("shannon_14/alpha-diversity.tsv", sep="\t")
shannon16 = pd.read_csv("shannon_16/alpha-diversity.tsv", sep="\t")

#rename for clarity
shannon12.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k12"}, inplace=True)
shannon14.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k14"}, inplace=True)
shannon16.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k16"}, inplace=True)

#merging via SampleID
merged = shannon12.merge(shannon14, on="SampleID").merge(shannon16, on="SampleID")

#save as new table
merged.to_csv("shannon-combined.tsv", sep="\t", index=False)

#plot
merged.set_index("SampleID")[["shannon_k12","shannon_k14","shannon_k16"]].plot(kind="box")
plt.ylabel("Shannon Diversity")
plt.title("Comparison of Shannon values for different k-mer sizes")
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'shannon-12/alpha-diversity.tsv'

### Plot Pielou

In [3]:
#importing exported files
pielou12 = pd.read_csv("pielou_12/alpha-diversity.tsv", sep="\t")
pielou14 = pd.read_csv("pielou_14/alpha-diversity.tsv", sep="\t")
pielou16 = pd.read_csv("pielou_16/alpha-diversity.tsv", sep="\t")

#rename for clarity
pielou12.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k12"}, inplace=True)
pielou14.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k14"}, inplace=True)
pielou16.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k16"}, inplace=True)

#merging via SampleID
merged = pielou12.merge(pielou14, on="SampleID").merge(pielou16, on="SampleID")

#save as new table
merged.to_csv("pielou-combined.tsv", sep="\t", index=False)

merged.set_index("SampleID")[["pielou_k12","pielou_k14","pielou_k16"]].plot(kind="box")
plt.ylabel("Pielous evenness index")
plt.title("Comparison of Pielous evenness index for different k-mer sizes")
plt.show()

### Plot Shannon and Pielou together

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Shannon diversity 
shannon12 = pd.read_csv("shannon_12/alpha-diversity.tsv", sep="\t")
shannon14 = pd.read_csv("shannon_14/alpha-diversity.tsv", sep="\t")
shannon16 = pd.read_csv("shannon_16/alpha-diversity.tsv", sep="\t")

shannon12.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k12"}, inplace=True)
shannon14.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k14"}, inplace=True)
shannon16.rename(columns={"Unnamed: 0": "SampleID", "shannon_entropy": "shannon_k16"}, inplace=True)

merged_shannon = shannon12.merge(shannon14, on="SampleID").merge(shannon16, on="SampleID")

# Pielou evenness
pielou12 = pd.read_csv("pielou_12/alpha-diversity.tsv", sep="\t")
pielou14 = pd.read_csv("pielou_14/alpha-diversity.tsv", sep="\t")
pielou16 = pd.read_csv("pielou_16/alpha-diversity.tsv", sep="\t")

pielou12.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k12"}, inplace=True)
pielou14.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k14"}, inplace=True)
pielou16.rename(columns={"Unnamed: 0": "SampleID", "pielou_evenness": "pielou_k16"}, inplace=True)

merged_pielou = pielou12.merge(pielou14, on="SampleID").merge(pielou16, on="SampleID")

# Visualization 
sns.set(style="whitegrid", context="notebook", font_scale=1.2)

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Shannon diversity plot
merged_shannon.set_index("SampleID")[["shannon_k12","shannon_k14","shannon_k16"]].plot(
    kind="box", ax=axes[0], color=dict(boxes="skyblue", whiskers="gray", medians="red")
)
axes[0].set_ylabel("Shannon Diversity")
axes[0].set_title("Shannon diversity across k-mer sizes")

# Pielou evenness plot
merged_pielou.set_index("SampleID")[["pielou_k12","pielou_k14","pielou_k16"]].plot(
    kind="box", ax=axes[1], color=dict(boxes="skyblue", whiskers="gray", medians="red")
)
axes[1].set_ylabel("Pielou's Evenness Index")
axes[1].set_title("Pielou evenness across k-mer sizes")

plt.tight_layout()

# Save as png 
plt.savefig("/home/jovyan/microbEvolve2/data/processed/figure_diversity_comparison.png", dpi=300)


plt.show()
