In [None]:
import os
from PIL import Image, ImageDraw
from IPython.display import display

In [None]:
path_visualizations = "data/keepa/generated/plots/categories-per-domain"
path_output_root = "data/keepa/generated/plots/categories-per-domain/combined"

In [None]:
filepaths = [os.path.join(path_visualizations, f) for f in os.listdir(path_visualizations) if os.path.isfile(os.path.join(path_visualizations, f))]
print(filepaths[0])
print(f"Number of files: {len(filepaths)}")


In [None]:
categories = {}
# Group by category
for filepath in filepaths:
	# get just the filename without the extension
	filename = os.path.basename(filepath)
	filename = os.path.splitext(filename)[0]
	filename_no_domain = filename.split("-")
	filename_no_domain = "-".join(filename_no_domain[:-2])
	if filename_no_domain not in categories:
		categories[filename_no_domain] = []
	categories[filename_no_domain].append(filepath)
# Sort dictionary by key string (category names)
categories = dict(sorted(categories.items()))
# Sort each category by filename
for category, paths in categories.items():
	paths.sort()
	filenames = [os.path.basename(filepath) for filepath in paths]
	print(f"{category}: {filenames}")


In [None]:
# 2x2 grid of PNG images
def combine_images(paths: list) -> Image: # type: ignore
	# Open the images
	image1 = Image.open(paths[0])
	image2 = Image.open(paths[1])
	image3 = Image.open(paths[2])
	image4 = Image.open(paths[3])

	# Get the size of the images - all images should be the same size
	width, height = image1.size

	# Decrease 400 pixels in width (crop legend on the right)
	width -= 580

	# Create a new image with twice the width
	new_image = Image.new('RGB', (2 * width + 580, 2 * height), (255, 255, 255)) # add back the 580 pixels so we keep the legend on the right

	# Paste the images into the new image
	new_image.paste(image1, (0, 0))
	new_image.paste(image2, (width, 0))
	new_image.paste(image3, (0, height))
	new_image.paste(image4, (width, height))

	# Paint a white rectangle from bottom right to top-800px, 580 pixels in width
	# This is to cover the bottom legend on the right
	draw = ImageDraw.Draw(new_image)
	top_left = (width * 2 + 20, height - 800)
	bottom_right = (2 * width + 580, height * 2)
	draw.rectangle([top_left, bottom_right], fill=(255, 255, 255)) # type: ignore

	return new_image # type: ignore

def scale_image(image: Image, scale: float) -> Image: # type: ignore
	width, height = image.size # type: ignore
	new_width = int(width * scale)
	new_height = int(height * scale)
	return image.resize((new_width, new_height), Image.LANCZOS) # type: ignore

sample_image = combine_images(categories[list(categories.keys())[-1]])
# sample_image.show() # doesn't work in remote SSH Jupyter notebook
# display(sample_image) # commented out to avoid displaying the image in the notebook (it's too big)
sample_image = scale_image(sample_image, 1.0 / 8.0) # type: ignore
display(sample_image)

In [None]:
# Save combined images
if not os.path.exists(path_output_root):
	os.makedirs(path_output_root)
for category, paths in categories.items():
	combined_image = combine_images(paths)
	combined_image.save(os.path.join(path_output_root, f"{category}.png")) # type: ignore
	print(f"Saved '{category}.png'")

In [None]:
# Combine image for all four domains
path_domains_root = "data/keepa/generated/plots/domains-all-categories"
path_domains_output = os.path.join(path_domains_root, "combined")
if not os.path.exists(path_domains_output):
	os.makedirs(path_domains_output)
filepaths_domains = [os.path.join(path_domains_root, f) for f in os.listdir(path_domains_root) if os.path.isfile(os.path.join(path_domains_root, f))]
filepaths_domains.sort()
# print(filepaths_domains)
combined_image = combine_images(filepaths_domains)
combined_image.save(os.path.join(path_domains_output, "domains-combined.png")) # type: ignore