In [21]:
import os
import json
from typing import List, Dict, Generator, Tuple, Optional
import sys
sys.path.append(os.path.abspath('..')) 
from utils import find_txt_files

In [22]:
# Example: ['data/levels/world1', 'data/levels/world2']
# INPUT_DIRS: List[str] = [r"/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros/Paths", 
#                          r"/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros 2 (Japan)/Paths"]

INPUT_DIRS: List[str] = [r"/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros/Processed", 
                         r"/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros 2 (Japan)/Processed",
                         r'/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Land/Processed']

# Path to the output JSON file.
OUTPUT_FILE: str = 'mario1_2_land.json'

# Num de colunas da janela
WINDOW_WIDTH: int = 50

# Altura da fase
WINDOW_HEIGHT: int = 14

In [24]:
def process_level_file(filepath: str, window_height: int) -> Optional[Tuple[List[str], int]]:
    """
    Reads a level file, adjusts lines to match window_height,
    and returns the processed lines and their width.
    """
    try:
        with open(filepath, 'r') as f:
            lines = [line.rstrip('\n') for line in f.readlines()]

        lines = [line for line in lines if line]

        if not lines:
            print(f"  Skipping empty file: {os.path.basename(filepath)}")
            return None

        line_width = len(lines[0])
        current_height = len(lines)

        if current_height > window_height:
            processed_lines = lines[current_height - window_height:]
        elif current_height < window_height:
            padding_line = '-' * line_width
            padding_needed = window_height - current_height
            processed_lines = [padding_line] * padding_needed + lines
        else:
            processed_lines = lines

        return processed_lines, line_width

    except FileNotFoundError:
        print(f"Error: File not found: {filepath}")
        return None
    except Exception as e:
        print(f"Error processing file {filepath}: {e}")
        return None

In [25]:
def generate_windows(filepath: str, level_lines: List[str], line_width: int, window_width: int, window_height: int) -> Generator[Dict, None, None]:
    """
    Generates sliding windows and their representations from processed level lines.
    """

    total_cols = line_width

    if total_cols < window_width:
        print(f"Level width ({total_cols}) is smaller than window width ({window_width}). Skipping level.")
        return 

    for start_col in range(total_cols - window_width + 1):
        current_window_lines = [line[start_col : start_col + window_width] for line in level_lines]

        str_horizontal_nosplit = "".join(current_window_lines)
        str_horizontal_newline = "\n".join(current_window_lines)
        str_horizontal_pipe = "|".join(current_window_lines)

        vertical_cols = []
        for j in range(window_width):
            col_str = "".join(current_window_lines[i][j] for i in range(window_height - 1, -1, -1))
            vertical_cols.append(col_str)

        str_vertical_nosplit = "".join(vertical_cols)
        str_vertical_newline = "\n".join(vertical_cols)
        str_vertical_pipe = "|".join(vertical_cols)

        yield {
            "source_file": os.path.basename(filepath),
            "window": current_window_lines,
            "str_horizontal_nosplit": str_horizontal_nosplit,
            "str_horizontal_newline": str_horizontal_newline,
            "str_horizontal_pipe": str_horizontal_pipe,
            "str_vertical_nosplit": str_vertical_nosplit,
            "str_vertical_newline": str_vertical_newline,
            "str_vertical_pipe": str_vertical_pipe,
        }

In [26]:

level_filepaths = find_txt_files(INPUT_DIRS)

all_windows_data = []
file_count = 0
total_windows = 0

for filepath in level_filepaths:
    file_count += 1
    # print(f"\nProcessing file {file_count}/{len(level_filepaths)}: {os.path.basename(filepath)}")
    processed_data = process_level_file(filepath, WINDOW_HEIGHT)

    if processed_data:
        level_lines, line_width = processed_data
        level_windows = 0
        # Use the generator to create windows for the current level
        for window_data in generate_windows(filepath, level_lines, line_width, WINDOW_WIDTH, WINDOW_HEIGHT):
            all_windows_data.append(window_data)
            level_windows += 1
        if level_windows > 0:
            print(f"  Generated {level_windows} windows.")
        total_windows += level_windows


print(f"\nProcessed {file_count} files.")
print(f"{total_windows} windows in total")

if total_windows > 0:
    try:
        output_dir = os.path.dirname(OUTPUT_FILE)
        if output_dir and not os.path.exists(output_dir):
             os.makedirs(output_dir)
             print(f"Created output directory: {output_dir}")

        with open(OUTPUT_FILE, 'w') as f:
            json.dump(all_windows_data, f, indent=4)
        print("Successfully created JSON dataset.")
    except IOError as e:
        print(f"Error writing JSON file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred during JSON writing: {e}")
else:
    print("\nNo windows were generated, JSON file will not be created.")



Searching for .txt files in:
- /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros/Processed
- /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Bros 2 (Japan)/Processed
- /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/TheVGLC/Super Mario Land/Processed
Found 46 .txt files.
  Generated 134 windows.
  Generated 174 windows.
  Generated 116 windows.
  Generated 177 windows.
  Generated 208 windows.
  Generated 123 windows.
  Generated 131 windows.
  Generated 133 windows.
  Generated 144 windows.
  Generated 152 windows.
  Generated 131 windows.
  Generated 307 windows.
  Generated 122 windows.
  Generated 195 windows.
  Generated 164 windows.
  Generated 160 windows.
  Generated 163 windows.
  Generated 111 windows.
  Generated 142 windows.
  Generated 150 windows.
  Generated 140 windows.
  Generated 144 windows.
  Generated 153 windows.
  Generated 109 windows.
  Ge