In [37]:
import re
filename = '../audio/data/in.txt'

def filter_lines(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()

    # Create a list to store the filtered lines
    filtered_lines = []

    # Regex pattern to match lines that look like names followed by a colon (e.g., EMMA:, NICO:, etc.)
    name_pattern = re.compile(r'^[A-Z][A-Za-z]*:$')

    # Iterate over each line
    for line in lines:
        # print(line)
        stripped_line = line.strip()
        # if name_pattern.match(stripped_line):
        #     print( 'match',name_pattern.match(stripped_line))

        # Skip empty lines, lines with special symbols like '=====', and lines that match the name pattern
        if stripped_line and not name_pattern.match(stripped_line) and \
            not re.fullmatch(r'[-=]+', stripped_line) and\
                len(stripped_line)>10:
            filtered_lines.append(stripped_line)

    return filtered_lines

text_de = filter_lines(filename)

from tqdm import tqdm
from deep_translator import GoogleTranslator

def translate_text(text_list):
    # Use any translator you like, in this example GoogleTranslator
    text_list_tanslated = []
    for text in tqdm(text_list):
        translated = GoogleTranslator(source='de', target='en').translate(text)
        text_list_tanslated.append(translated)
    # translated = GoogleTranslator(source='de', target='en').translate(text)  # output -> Weiter so, du bist großartig
    return text_list_tanslated
text_en = translate_text(text_de)

100%|██████████| 185/185 [00:52<00:00,  3.55it/s]


In [38]:
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from textwrap import wrap

def split_into_chunks(text, words_per_line):
    """
    Splits a string into chunks, each containing the specified number of words.
    """
    words = text.split()
    return [' '.join(words[i:i+words_per_line]) for i in range(0, len(words), words_per_line)]

def create_pdf_from_list(text_list, file_name):
    c = canvas.Canvas(file_name, pagesize=A4)
    width, height = A4
    c.setFont("Helvetica", 12)

    # Calculate the grid size to cover the entire page
    grid_width = width / 4
    grid_height = height / 4
    words_per_line = 6  # Number of words per line

    # Loop through the list, 16 elements per page
    for i in range(0, len(text_list), 16):
        page_elements = text_list[i:i+16]

        # Draw grid lines (horizontal and vertical)
        for row in range(5):  # 4 rows, but 5 lines (including bottom one)
            y = height - row * grid_height
            c.line(0, y, width, y)  # horizontal lines

        for col in range(5):  # 4 columns, but 5 lines (including right one)
            x = col * grid_width
            c.line(x, 0, x, height)  # vertical lines

        # Place each element in a 4x4 grid
        for j, element in enumerate(page_elements):
            col = j % 4
            row = j // 4

            # Calculate the center position of each grid cell
            x_pos = col * grid_width + grid_width / 2
            y_pos = height - (row * grid_height + grid_height / 2)

            # Split the text into lines with 4 words each
            lines = split_into_chunks(element, words_per_line)

            # Save the canvas state, apply rotation, and draw the rotated text
            c.saveState()
            c.translate(x_pos, y_pos)  # Move the origin to the center of the cell
            c.rotate(90)  # Rotate the text by 90 degrees

            # Calculate the vertical position for each line of text
            line_height = 14  # Adjust the line height as needed
            y_offset = (len(lines) - 1) * line_height / 2  # Center the text vertically

            # Draw each line within the grid cell
            for line in lines:
                c.drawCentredString(0, y_offset, line)
                y_offset -= line_height

            c.restoreState()  # Restore the canvas state to undo the translation and rotation

        c.showPage()  # Move to the next page

    c.save()


# Example usage
# text_list = ["short text", "longer text that might overlap", "this is a very long text that could cause overlap and span multiple lines", "test"]
create_pdf_from_list(text_en, "en.pdf")


In [39]:
def create_pdf_from_list_de(text_list, file_name):
    c = canvas.Canvas(file_name, pagesize=A4)
    width, height = A4
    c.setFont("Helvetica", 12)

    # Calculate the grid size to cover the entire page
    grid_width = width / 4
    grid_height = height / 4
    words_per_line = 6  # Number of words per line

    # Loop through the list, 16 elements per page
    for i in range(0, len(text_list), 16):
        page_elements = text_list[i:i+16]

        # Draw grid lines (horizontal and vertical)
        for row in range(5):  # 4 rows, but 5 lines (including bottom one)
            y = height - row * grid_height
            c.line(0, y, width, y)  # horizontal lines

        for col in range(5):  # 4 columns, but 5 lines (including right one)
            x = col * grid_width
            c.line(x, 0, x, height)  # vertical lines

        # Place each element in a 4x4 grid starting from top-right
        for j, element in enumerate(page_elements):
            col = 3 - (j % 4)  # Start from rightmost column (col 3), move left
            row = j // 4

            # Calculate the center position of each grid cell
            x_pos = col * grid_width + grid_width / 2
            y_pos = height - (row * grid_height + grid_height / 2)

            # Split the text into lines with 4 words each
            lines = split_into_chunks(element, words_per_line)

            # Save the canvas state, apply rotation, and draw the rotated text
            c.saveState()

            # Move origin to the center of the cell
            c.translate(x_pos, y_pos)
            c.rotate(90)  # Rotate the text 90 degrees

            # Calculate the vertical position for each line of text
            line_height = 14  # Adjust the line height as needed
            y_offset = (len(lines) - 1) * line_height / 2  # Center the text vertically

            # Draw each line within the grid cell
            for line in lines:
                c.drawCentredString(0, y_offset, line)
                y_offset -= line_height

            c.restoreState()  # Restore the canvas state to undo the translation and rotation

        c.showPage()  # Move to the next page

    c.save()


# Example usage for de version
# text_de = ["short text", "longer text that might overlap", "this is a very long text that could cause overlap and span multiple lines", "test"]
create_pdf_from_list_de(text_de, "de.pdf")
