In [1]:
import re

def extract_headers_and_convert_to_text(file_path):
    """
    Extracts headers from a .win file and converts any associated readable strings to text.

    Args:
    file_path (str): The path to the .win file to be analyzed.

    Returns:
    dict: A dictionary with headers as keys and lists of associated readable strings as values.
    """
    # Predefined list of headers typically found in Game Maker .win files
    headers = [
        "FORM",  # Indicates the start of a composite file format
        "GEN8",  # General game information (like settings, version)
        "OPTN",  # Game options
        "EXTN",  # Extensions used in the game
        "SOND",  # Sound resources
        "SPRT",  # Sprite resources
        "BGND",  # Background resources
        "PATH",  # Path resources
        "SCPT",  # Script resources
        "SHDR",  # Shader resources
        "FONT",  # Font resources
        "TMLN",  # Timeline resources
        "OBJT",  # Object resources (potentially including characters)
        "ROOM",  # Room resources (levels/stages in the game)
        "DAFL",  # Data file resources
        "TPAG",  # Texture page or atlas information
        "CODE",  # Code resources
        "VARI",  # Global variables
        "FUNC",  # Functions
        "STRG",  # String resources
        "TXTR",  # Texture resources
        "AUDO",  # Audio resources
        "LANG",  # Language resources
        "GLOB"   # Global settings
    ]

    # Helper function to find human-readable strings in binary data.
    def find_readable_strings(data):
        """
        Finds and returns human-readable strings from binary data.

        Args:
        data (bytes): Binary data in which to find readable strings.

        Returns:
        list: A list of decoded, readable strings found in the data.
        """
        # Regular expression to find sequences of printable characters.
        pattern = re.compile(b'[ -~]{4,}')
        return pattern.findall(data)

    results = {}

    with open(file_path, 'rb') as file:
        while True:
            # Read the file in chunks of 1024 bytes.
            chunk = file.read(1024)  
            if not chunk:
                break  # End of file reached.

            for header in headers:
                # Convert header string to bytes for comparison.
                encoded_header = header.encode()
                if encoded_header in chunk:
                    # Extract and decode readable strings following the header.
                    strings = find_readable_strings(chunk)
                    decoded_strings = [s.decode('utf-8', errors='ignore') for s in strings]
                    results[header] = decoded_strings

    return results

In [None]:
# Example usage of the function
file_path = '../data/data.win'
headers_text = extract_headers_and_convert_to_text(file_path)
print(headers_text)