In [98]:
import os
import numpy as np
import PIL.Image as Image

In [99]:
def convert_txt_to_bytes(filepath: str) -> bytes:
	with open(filepath, 'r') as f:
		data = f.read()
	return data.encode('utf-8')

def convert_bytes_to_image(data: bytes) -> Image:
	'''
	Converts a bytes object to an image in grayscale format.	
	'''
	# We would like a square image, so we calculate the side length as the next integer past
	# the square root of the length of the data.
	side_len: int = int(np.ceil(np.sqrt(len(data))))

	# Pad the data with null bytes to make it a square.
	data += b'\0' * (side_len ** 2 - len(data))

	# Create an image from the data.
	return Image.frombytes('L', (side_len, side_len), data)

def convert_bytes_to_color_image(data: bytes) -> Image:
	'''
	Converts a bytes object to an image in RGB format.
	'''

	# We would like a square image, so we calculate the side length as the next integer past
	# the square root of the length of the data.
	side_len: int = int(np.ceil(np.sqrt(len(data) / 3)))

	# Clone the input data array so we don't modify the original.
	padded_data = bytearray(data)

	# Pad the data with null bytes to make it a square.
	padded_data += b'\0' * (side_len ** 2 * 3 - len(data))

	# Create an image from the data.
	return Image.frombytes('RGB', (side_len, side_len), padded_data)

def convert_multiple_bytearrays_to_color_image(
		data_arrays: list[bytes],
		spacer_rows: int = 4) -> Image:
	'''
	Converts a list of bytes objects to an image in RGBA format.
	'''

	# To accomodate clear spacing between the different data arrays, we calcluate the side
	# length of the combined byte array without spacing to guesstimate how the length of
	# the spacers.
	combined_array = b''.join(data_arrays)
	side_len_estimate = np.sqrt(len(combined_array) / 3)
	num_spacers = len(data_arrays) - 1
	
	# Calculate the total number of bytes each spacer will need to be
	pix_per_spacer = int(side_len_estimate * spacer_rows)

	# Combine the bytes arrays into a single bytes array, with each array separated by a spacer
	# of solid white RGBA pixels
	combined_array = (b'\xff\xff\xff' * pix_per_spacer).join(data_arrays)

	side_len = int(np.ceil(np.sqrt(len(combined_array) / 3)))

	# Pad the data with null bytes to make it a square.
	combined_array += b'\0' * (side_len ** 2 * 3 - len(combined_array))

	# Create an image from the data.
	return Image.frombytes('RGB', (side_len, side_len), combined_array)


In [100]:
filename_list = [f for f in os.listdir('data') if ".txt" in f]

In [101]:
file_images = {f.replace(".txt~", ""): convert_bytes_to_color_image(convert_txt_to_bytes(f"data/{f}")) for f in filename_list}

In [103]:
for name, img in file_images.items():
  img.save(f"out/{name}.png", "PNG")

In [107]:
# Create a single image from all input files
sorted_filelist = sorted(filename_list)
print(sorted_filelist)
total_image = convert_multiple_bytearrays_to_color_image([convert_txt_to_bytes(f"data/{f}") for f in sorted_filelist])

total_image.save("out/total.png", "PNG")

["1_Harry Potter and the Sorcerer's Stone.txt~", '2_Harry Potter and the Chamber of Secrets.txt~', '3_Harry Potter and the Prisoner of Azkaban.txt~', '4_Harry Potter and the Goblet of Fire.txt~', '5_Harry Potter and the Order of the Phoenix.txt~', '6_Harry Potter and The Half-Blood Prince.txt~', '7_Harry Potter and the Deathly Hallows.txt~']
