In [6]:
%matplotlib inline

import numpy as np
import os
import numpy.typing as npt
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import json

In [7]:
ambiguous_bases = ["B", "D", "H", "K", "M", "N", "R", "S", "V", "W", "Y"]

In [8]:
#load each json file
data = {}
for base in ambiguous_bases:
    json_file = f"{base}.json"
    with open(json_file, "r") as file:
        data[base] = json.load(file)


In [9]:
#create graphs dir
graphs_dir = 'graphs'
os.makedirs(graphs_dir, exist_ok=True)

In [10]:
for base, counts in data.items():
    base_dir = os.path.join(graphs_dir, base)
    os.makedirs(base_dir, exist_ok=True)
    
    for chromosome, chromosome_data in counts.items():
        sorted_counts = {k: v for k, v in sorted(chromosome_data.items(), key=lambda item: int(item[0]))}

        fig, ax = plt.subplots(figsize=(10, max(6, len(sorted_counts) * 0.2)))
        bars = ax.barh(list(sorted_counts.keys()), list(sorted_counts.values()), color='blue', alpha=0.7)
        ax.set_xlabel('# of occurrences')
        ax.set_ylabel('k-N')
        ax.set_title(f'Chromosome {chromosome} Bar Graph')
        ax.bar_label(bars, padding=3, fontsize=10)
        plt.tight_layout()
        
        #save
        graph_filename = f'chromosome_{base}_{chromosome}_graph.png'
        plt.savefig(os.path.join(base_dir, graph_filename), dpi=300)
        plt.close()

    #mkae collage of chromosome graphs
    collage_path = os.path.join(base_dir, f'all_{base}.png')
    if os.path.exists(collage_path):
        os.remove(collage_path)

    #collect all img files
    image_files = [os.path.join(base_dir, f) for f in os.listdir(base_dir) if f.endswith('.png')]
    image_files.sort()  # Sort by filename

    # open each img and make collage
    images = [Image.open(img_file) for img_file in image_files]
    num_images = len(images)

    cols = 3  
    rows = (num_images + cols - 1) // cols  
    img_width, img_height = images[0].size

    #create blank collage canvas
    collage_width = cols * img_width
    collage_height = rows * img_height
    collage = Image.new('RGB', (collage_width, collage_height), (255, 255, 255))

    #paste each bar chart into the collage
    for i, img in enumerate(images):
        x_offset = (i % cols) * img_width
        y_offset = (i // cols) * img_height
        collage.paste(img, (x_offset, y_offset))

    collage.save(collage_path)

    print(f'Collage saved at {collage_path}')


Collage saved at graphs\B\all_B.png
Collage saved at graphs\D\all_D.png
Collage saved at graphs\H\all_H.png
Collage saved at graphs\K\all_K.png
Collage saved at graphs\M\all_M.png
Collage saved at graphs\N\all_N.png
Collage saved at graphs\R\all_R.png
Collage saved at graphs\S\all_S.png
Collage saved at graphs\V\all_V.png
Collage saved at graphs\W\all_W.png
Collage saved at graphs\Y\all_Y.png
