In [5]:
import os
import json
from typing import List, Dict, Generator
import sys
sys.path.append(os.path.abspath('..')) 
from utils import find_txt_files

In [6]:
INPUT_DIRS: List[str] = [r'/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Kid Icarus/Paths']

OUTPUT_FILE: str = 'kid_icarus_paths.json'

# Tamanho da janela
WINDOW_HEIGHT: int = 35

In [7]:
def generate_line_windows(filepath: str, level_lines: List[str], window_height: int) -> Generator[Dict, None, None]:
    """
    Generates vertically sliding line windows and their representations.
    """
    total_lines = len(level_lines)

    if not level_lines:
        print(f"Skipping empty level content for {os.path.basename(filepath)}")
        return

    if total_lines < window_height:
        print(f"Level height ({total_lines}) in {os.path.basename(filepath)} is smaller than window height ({window_height}). Skipping.")
        return

    line_width = len(level_lines[0])

    for start_line in range(total_lines - window_height + 1):
        end_line = start_line + window_height
        current_window_lines = level_lines[start_line:end_line]

        str_horizontal_nosplit = "".join(current_window_lines)
        str_horizontal_newline = "\n".join(current_window_lines)
        str_horizontal_pipe = "|".join(current_window_lines)

        vertical_cols = []
        for j in range(line_width):
            col_str = "".join(current_window_lines[i][j] for i in range(window_height - 1, -1, -1))
            vertical_cols.append(col_str)

        str_vertical_nosplit = "".join(vertical_cols)
        str_vertical_newline = "\n".join(vertical_cols)
        str_vertical_pipe = "|".join(vertical_cols)

        yield {
            "source_file": os.path.basename(filepath),
            "window": current_window_lines,
            "str_horizontal_nosplit": str_horizontal_nosplit,
            "str_horizontal_newline": str_horizontal_newline,
            "str_horizontal_pipe": str_horizontal_pipe,
            "str_vertical_nosplit": str_vertical_nosplit,
            "str_vertical_newline": str_vertical_newline,
            "str_vertical_pipe": str_vertical_pipe,
        }


In [8]:

print(f"Starting line-window dataset creation (Window Height: {WINDOW_HEIGHT})...")
level_filepaths = find_txt_files(INPUT_DIRS)

all_windows_data = []
file_count = 0
total_windows = 0
processed_file_count = 0

for filepath in level_filepaths:
    file_count += 1
    print(f"\nProcessing file {file_count}/{len(level_filepaths)}: {os.path.basename(filepath)}")

    try:
        with open(filepath, 'r') as f:
            lines = [line.rstrip('\n') for line in f.readlines()]
        processed_lines = [line for line in lines if line]

        if not processed_lines:
             print(f"  Skipping empty file content.")
             continue 

        level_windows = 0
        for window_data in generate_line_windows(filepath, processed_lines, WINDOW_HEIGHT):
            all_windows_data.append(window_data)
            level_windows += 1

        if level_windows > 0:
            print(f"  Generated {level_windows} line windows.")
            total_windows += level_windows
            processed_file_count += 1


    except FileNotFoundError:
         print(f"Error: File not found: {filepath}")
    except Exception as e:
         print(f"Error processing file {filepath}: {e}")


print(f"Successfully generated {total_windows} windows from {processed_file_count} out of {file_count} files.")

if total_windows > 0:
    print(f"\nWriting data to {OUTPUT_FILE}...")
    try:
        output_dir = os.path.dirname(OUTPUT_FILE)
        if output_dir and not os.path.exists(output_dir):
             os.makedirs(output_dir)
             print(f"Created output directory: {output_dir}")

        with open(OUTPUT_FILE, 'w') as f:
            json.dump(all_windows_data, f, indent=4)
        print("Successfully created JSON dataset.")
    except IOError as e:
        print(f"Error writing JSON file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred during JSON writing: {e}")
else:
    print("\nNo windows were successfully generated, JSON file will not be created.")


Starting line-window dataset creation (Window Height: 35)...
Searching for .txt files in:
- /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Kid Icarus/Paths
Found 6 .txt files.

Processing file 1/6: kidicarus_1_Annotated_Path.txt
  Generated 139 line windows.

Processing file 2/6: kidicarus_2_Annotated_Path.txt
  Generated 169 line windows.

Processing file 3/6: kidicarus_3_Annotated_Path.txt
  Generated 247 line windows.

Processing file 4/6: kidicarus_4_Annotated_Path.txt
  Generated 125 line windows.

Processing file 5/6: kidicarus_5_Annotated_Path.txt
  Generated 172 line windows.

Processing file 6/6: kidicarus_6_Annotated_Path.txt
  Generated 199 line windows.
Successfully generated 1051 windows from 6 out of 6 files.

Writing data to kid_icarus_paths.json...
Successfully created JSON dataset.
