# Evaluating g4m3 0f l1f3 Attribute Distribution


In [None]:
%pip install pandas altair vega vega_datasets vegafusion vl-convert-python Pillow

## Imports


In [None]:
# All the imports
from vega_datasets import data
import json
import base64
import os
import pandas as pd
import ast
import altair as alt
import re
from IPython.display import SVG, display, HTML, IFrame
import xml.etree.ElementTree as ET
from PIL import Image
import io
import urllib.parse
import numpy as np
alt.data_transformers.disable_max_rows()
# alt.renderers.enable('notebook')

## Functions

Collection of utility functions


In [None]:
def read_json_files(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_id = re.sub(".json", "", filename)
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as f:
                file_data = json.load(f)
                for item in file_data:
                    item['mint_ts'] = file_id
                data.extend(file_data)
    df = pd.DataFrame(data)
    return df


def flatten_attributes(df):
    # Convert the 'attributes' column to a list of dictionaries
    attributes_list = df['attributes'].apply(
        lambda x: [ast.literal_eval(i) if isinstance(i, str) else i for i in x])
    # Extract the trait_type and value from each dictionary and use them as column names and values
    attributes = attributes_list.apply(
        lambda x: {d['trait_type']: d['value'] for d in x})
    attributes_df = pd.DataFrame(attributes.tolist())
    df = pd.concat([df.drop(columns=['attributes']), attributes_df], axis=1)
    return df


def addGameState(df):
    no_obs = 0
    no_errors = 0

    def get_game_state(x):
        nonlocal no_errors
        nonlocal no_obs
        no_obs = no_obs + 1
        try:
            decoded_url = urllib.parse.unquote(x)
            return extractGameState(decodeBase64(decoded_url.split(';')[1]))
        except IndexError:
            no_errors += 1
            return None

    df['game_state'] = df['image'].apply(get_game_state)
    df = df.drop(columns=['image'])
    print(f"errors encountered: {no_errors} on {no_obs} observations")
    return df


def decodeBase64(image):
    # remove the prefix
    encoded_image_data = image.split(',')[1]
    # decode the base64 encoded image
    decoded_image = base64.b64decode(encoded_image_data)
    # convert the bytes to string
    svg_content = decoded_image.decode('utf-8')
    return svg_content


def extractGrid(imageSVG):
    root = ET.fromstring(imageSVG)

    # iterate over all elements in the SVG file
    for elem in root.iter():
        # do something with the element
        print(elem.tag, elem.attrib)
    # save the modified SVG file


def displayImage(image64):
    decoded = decodeBase64(image64)
    display(SVG(decoded))


def displaySVG(svg_content):
    decoded = decodeBase64(svg_content)
    print(decoded)
    display(HTML(decoded))


def displaySvgIFrame(svg_data_url, width=360, height=360):
    # Display the SVG in an iframe
    return IFrame(svg_data_url, width=width, height=height)


def displayImages(base64Array):
    for image in base64Array:
        decoded = decodeBase64(image)
        display(SVG(decoded))


def extractGameState(svg_content):
    # Initialize the game state with all cells as dead
    game_state = [[0 for _ in range(8)] for _ in range(8)]

    # Define the patterns to search for alive and dead cells in the SVG content
    alive_patterns = [
        r'<use href="#l" x="(\d+)" y="(\d+)"', r'<use href="#b" x="(\d+)" y="(\d+)"']

    # Find all occurrences of alive cells in the SVG content
    for pattern in alive_patterns:
        for match in re.finditer(pattern, svg_content):
            x, y = map(int, match.groups())
            game_state[(y-22)//40][(x-22)//40] = 1

    return game_state


def simpleGameStateSVG(game_state, game_state_prev=None):
    # Define the SVG header and footer
    svg_header = '<svg width="320" height="320" xmlns="http://www.w3.org/2000/svg">'
    svg_footer = '</svg>'

    # Define the colors for alive, dead, born, and died cells
    alive_color = 'black'
    dead_color = 'white'
    born_color = 'green'
    died_color = 'red'

    # Initialize the SVG content with the header
    svg_content = svg_header

    # Loop over the game state and add the appropriate rectangles to the SVG content
    for i in range(8):
        for j in range(8):
            if game_state_prev is not None:
                if game_state[i][j] == 1 and game_state_prev[i][j] == 0:
                    color = born_color
                elif game_state[i][j] == 0 and game_state_prev[i][j] == 1:
                    color = died_color
                elif game_state[i][j] == 1:
                    color = alive_color
                else:
                    color = dead_color
            else:
                color = alive_color if game_state[i][j] == 1 else dead_color
            svg_content += f'<rect x="{j*40}" y="{i*40}" width="40" height="40" fill="{color}" />'

    # Add the SVG footer to the content
    svg_content += svg_footer

    # Encode the SVG content to base64
    svg_base64 = base64.b64encode(svg_content.encode('utf-8')).decode('utf-8')
    svg_data_url = "data:image/svg+xml;base64," + svg_base64

    return svg_data_url


def printEndOfEpoch(df, epoch, noGenerations):
    # displaying last N images (noGenerations) for a given epoch ID
    to_display = df[df['epoch_id'] == epoch]['image']
    to_display = to_display.tail(noGenerations)

    for image in to_display:
        # decoded = decodeBase64(image)
        # print(decoded)
        display(displaySvgIFrame(image))


def printEndOfEpochBW(df, epoch, noGenerations, images_per_row=3):
    # displaying last N images (noGenerations) for a given epoch ID
    to_display = df[df['epoch_id'] == epoch]['image']
    to_display = to_display.tail(noGenerations)

    # Initialize HTML content
    html_content = '<table>'

    count = 0

    game_state_prev = None

    for image in to_display:
        decoded = decodeBase64(image)
        game_state = extractGameState(decoded)
        svg_data_url = simpleGameStateSVG(game_state, game_state_prev)

        # Start a new row
        if count % images_per_row == 0:
            html_content += '<tr>'

        # Add iframe to the row
        html_content += f'<td><iframe src="{svg_data_url}" width="360" height="360" frameborder="0"></iframe></td>'

        # End the row
        if count % images_per_row == images_per_row - 1:
            html_content += '</tr>'

        count += 1
        game_state_prev = game_state

    # Close the table
    html_content += '</table>'

    # Display the HTML content
    display(HTML(html_content))


def extractGameState(svg_content):
    # Initialize the game state with all cells as dead
    game_state = [[0 for _ in range(8)] for _ in range(8)]

    # Define the patterns to search for alive and dead cells in the SVG content
    alive_patterns = [
        r'<use href="#l" x="(\d+)" y="(\d+)"', r'<use href="#b" x="(\d+)" y="(\d+)"']

    # Find all occurrences of alive cells in the SVG content
    for pattern in alive_patterns:
        for match in re.finditer(pattern, svg_content):
            x, y = map(int, match.groups())
            game_state[(y-22)//40][(x-22)//40] = 1

    return game_state


def simpleGameStateSVG(game_state, game_state_prev=None):
    # Define the SVG header and footer
    svg_header = '<svg width="320" height="320" xmlns="http://www.w3.org/2000/svg">'
    svg_footer = '</svg>'

    # Define the colors for alive, dead, born, and died cells
    alive_color = 'black'
    dead_color = 'white'
    born_color = 'green'
    died_color = 'red'

    # Initialize the SVG content with the header
    svg_content = svg_header

    # Loop over the game state and add the appropriate rectangles to the SVG content
    for i in range(8):
        for j in range(8):
            if game_state_prev is not None:
                if game_state[i][j] == 1 and game_state_prev[i][j] == 0:
                    color = born_color
                elif game_state[i][j] == 0 and game_state_prev[i][j] == 1:
                    color = died_color
                elif game_state[i][j] == 1:
                    color = alive_color
                else:
                    color = dead_color
            else:
                color = alive_color if game_state[i][j] == 1 else dead_color
            svg_content += f'<rect x="{j*40}" y="{i*40}" width="40" height="40" fill="{color}" />'

    # Add the SVG footer to the content
    svg_content += svg_footer

    # Encode the SVG content to base64
    svg_base64 = base64.b64encode(svg_content.encode('utf-8')).decode('utf-8')
    svg_data_url = "data:image/svg+xml;base64," + svg_base64

    return svg_data_url


def displayImageAndPattern(row):
    # display image & pattern to compare repeating patterns given a df_flat row
    # print("row: ")
    # print(row)
    # image = row['image']
    game_state = row['game_state']
    image = row['image']
    print(f"displaying {row['epoch_id']}/{row['generation']}")
    display(displaySvgIFrame(simpleGameStateSVG(game_state)))
    display(displaySvgIFrame(image))
    # display(displaySvgIFrame(image))


def checkRepeatingPatterns(df, max_repeat_length=512):
    """
    Check for repeating patterns in the game states of a dataframe.

    :param df: DataFrame containing the game states and epochs
    :param max_repeat_length: The maximum number of recent game states to check
    :return: A list of booleans indicating if the game state of each row is a repeating pattern
    """
    # Initialize variables
    total_repetitions = 0
    epochs_with_repetition = set()
    game_states = []
    current_epoch = None
    repeating_patterns = []

    # Loop through each row of the dataframe
    for index, row in df.iterrows():
        new_game_state = row['game_state']
        new_epoch = row['epoch_id']
        new_generation = row['generation']

        # Check if the epoch has changed
        if new_epoch != current_epoch:
            # Reset the game states and update the current epoch
            game_states = []
            current_epoch = new_epoch

        # Check if the new game state is in the list of recent game states
        previous_occurrences = [
            gs for gs in game_states[-max_repeat_length:] if gs['state'] == new_game_state]

        if previous_occurrences:
            repeating_patterns.append(True)
            total_repetitions += 1
            epochs_with_repetition.add(new_epoch)
            print(">>> NEW REPEATING PATTERN <<<")
            for occurrence in previous_occurrences:
                print(
                    f"Game state from epoch {occurrence['epoch']} generation {occurrence['generation']} is repeated in epoch {new_epoch} generation {new_generation}")
                print(f"index: {index}")
                matched = df[(df['epoch_id'] == occurrence['epoch']) & (
                    df['generation'] == occurrence['generation'])]

                if (not matched.empty):
                    # matched_row = df[(df['epoch_id'] == matched['epoch_id']) & (
                    #     df['generation'] == matched['generation'])]

                    displayImageAndPattern(row)
                    displayImageAndPattern(matched.iloc[0])
                    # print(f">>> matched")

                    # print(
                    # f"matched: { matched['epoch_id']} {matched['generation']}")
                    # print(matched)
        else:
            repeating_patterns.append(False)

        # Update the list of game states
        game_states.append(
            {'state': new_game_state, 'epoch': new_epoch, 'generation': new_generation})

    print(f"total repetitions: {total_repetitions}/{len(df['generation'])}")
    print(
        f"epochs with repetition: {len(epochs_with_repetition)}/{len(df['epoch_id'].unique())}")
    return repeating_patterns


def extractColorsFromSvg(svg_string):
    # Parse the SVG string
    root = ET.fromstring(svg_string)

    # Define a set to store unique color codes
    colors = set()

    # Recursive function to traverse SVG elements and extract color codes
    def traverse_element(element):
        # Check for 'fill' attribute and add its value to the colors set
        fill = element.get('fill')
        if fill and fill.startswith('#'):
            colors.add(fill)

        # Check for 'stroke' attribute and add its value to the colors set
        stroke = element.get('stroke')
        if stroke and stroke.startswith('#'):
            colors.add(stroke)

        # Recursively traverse child elements
        for child in element:
            traverse_element(child)

    # Start the traversal from the root element
    traverse_element(root)

    return colors

## Read data


In [None]:
directory = '../runs/'
df = read_json_files(directory)
df_flat = flatten_attributes(df)
df_flat['epoch_id'] = df_flat['mint_ts'] + df_flat['epoch']
df_flat_gamestate = addGameState(df_flat)
df_flat_images = df_flat
# df_flat = df_flat.drop("image", axis=1)

## Analysis


In [None]:
epoch_lengths = df_flat.groupby(
    'epoch_id').size().reset_index(name='generations')

collection_lengths = df_flat.groupby(
    'mint_ts').size().reset_index(name='collection_size')

print("unfiltered data length: ", len(epoch_lengths))
filtered_epoch_lengths = epoch_lengths[epoch_lengths['generations'] <= 512]
print("dataset length: ", len(filtered_epoch_lengths))

chart_pattern = alt.Chart(df_flat).mark_bar().encode(
    x=alt.X("pattern", sort="y"),
    y="count()"
)

chart_shape = alt.Chart(df_flat).mark_bar().encode(
    x=alt.X("shape", sort="y"),
    y="count()"
)

chart_speed = alt.Chart(df_flat).mark_bar().encode(
    x=alt.X("speed", sort="y"),
    y="count()"
)

chart_times = alt.Chart(df_flat).mark_bar().encode(
    x=alt.X("times", sort="y"),
    y="count()"
)

chart1 = alt.Chart(epoch_lengths).mark_bar().encode(
    alt.X("generations", bin=alt.Bin(maxbins=30)),
    y='count()',
)

chart2 = alt.Chart(filtered_epoch_lengths).mark_bar().encode(
    alt.X("generations", bin=True),
    y='count()',
)

chart3 = alt.Chart(collection_lengths).mark_bar().encode(
    alt.Y("collection_size"),
    x='mint_ts'
)

# create a new DataFrame with the count of generations for each epoch_id
epoch_generations = df_flat.groupby(
    'epoch_id').size().reset_index(name='generations')

# merge the new DataFrame with df_flat to add the generations column
df_flat_merged = pd.merge(df_flat, epoch_generations, on='epoch_id')

# create the scatter plot
chart4 = alt.Chart(df_flat_merged).mark_circle().encode(
    x='epoch',
    y='mint_ts',
    size='generations'
)

chart5 = alt.Chart(df_flat).mark_circle().encode(
    x="shape",
    y="pattern",
    size="count()"
)

chart6 = alt.Chart(df_flat).mark_circle().encode(
    x="times",
    y="pattern",
    size="count()"
)

chart7 = alt.Chart(df_flat).mark_circle().encode(
    x="times",
    y="shape",
    size="count()"
)

chart8 = alt.Chart(df_flat).mark_circle().encode(
    x="times",
    y="epoch",
    size="count()"
)

chart9 = alt.Chart(df_flat).mark_bar().encode(
    x="times",
    y="count()",
    color="mint_ts"
)

# combine charts
combined_chart = (chart_pattern | chart_shape | chart_speed | chart_times) & (chart1 | chart2) & (
    chart3 | chart4) & (chart5) & (chart6) & chart7 & chart8 & chart9

# plot chart
combined_chart

In [None]:
repeating_patterns = checkRepeatingPatterns(df_flat_images, 512)

In [None]:
image_raw = df['image'][100]
image_decoded = decodeBase64(image_raw)

extractGrid(image_decoded)

In [None]:
image_example = decodeBase64(df_flat_images['image'][10])
colors = extractColorsFromSvg(image_example)
colors

## Checking for correct evolution


In [None]:


def is_valid_game_of_life_progression(old_state, new_state):
    # Define the rules of Conway's Game of Life
    def apply_rules(cell, neighbors_count):
        if cell == 1:
            return 1 if neighbors_count in (2, 3) else 0
        else:
            return 1 if neighbors_count == 3 else 0

    # Convert the old_state and new_state to NumPy arrays for easier manipulation
    old_state_np = np.array(old_state)
    new_state_np = np.array(new_state)

    # Create an empty array to store the next generation
    next_generation = np.zeros_like(old_state_np)

    # Iterate through each cell in the grid
    for i in range(old_state_np.shape[0]):
        for j in range(old_state_np.shape[1]):
            # Get the current cell value from the old state
            cell = old_state_np[i, j]

            # Compute the sum of the values of neighboring cells in the old state
            neighbors_count = np.sum(
                old_state_np[max(0, i - 1):i + 2, max(0, j - 1):j + 2]) - cell

            # Apply the rules to determine the next state of the cell
            next_generation[i, j] = apply_rules(cell, neighbors_count)

    # Check if the next generation matches the new state
    return np.array_equal(next_generation, new_state_np)


# Example usage:
old_state = [
    [0, 0, 0, 0, 0, 0, 0, 1],
    [0, 1, 1, 1, 0, 0, 0, 1],
    [0, 0, 0, 0, 0, 0, 0, 1],
    [0, 0, 1, 0, 0, 0, 1, 1],
    [0, 0, 0, 1, 0, 0, 1, 0],
    [0, 0, 0, 1, 0, 0, 1, 1],
    [0, 0, 0, 0, 0, 0, 1, 0],
    [1, 0, 1, 1, 1, 1, 1, 0]
]

new_state = [
    [0, 0, 0, 0, 0, 0, 0, 1],
    [0, 1, 1, 1, 0, 0, 0, 1],
    [0, 0, 0, 0, 0, 0, 0, 1],
    [0, 0, 0, 1, 0, 0, 1, 1],
    [0, 0, 0, 1, 0, 0, 1, 0],
    [0, 0, 0, 1, 0, 0, 1, 1],
    [0, 0, 0, 0, 0, 0, 1, 0],
    [0, 0, 0, 1, 0, 0, 1, 1]
]

is_valid = is_valid_game_of_life_progression(old_state, new_state)
print(f"Is the progression valid? {is_valid}")

In [None]:


row_0 = df_flat_images.iloc[0]
row_1 = df_flat_images.iloc[1]

In [None]:
state_0 = row_0['game_state']
state_1 = row_1['game_state']

In [None]:
# check if iterations work correctly
def checkCollectionG0lSequence(df):

    for idx, row in df.iterrows():
        print(f"row: ", idx)

        # row["game_state"]
        if idx > 0:
            old_state = df.iloc[idx - 1]["game_state"]
            new_state = df.iloc[idx]["game_state"]

            is_valid = is_valid_game_of_life_progression(old_state, new_state)
            print(f"is valid: {is_valid}")
        else:
            print(f"row: ", idx)

In [None]:
checkCollectionG0lSequence(df_flat_images)