In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
import os

In [2]:
pattern_period = {"block" : 1, #still lifes
              "bee_hive" : 1,
              "loaf" : 1,
              "boat" : 1,
              "ship" : 1,
              "tub" : 1,
              "pond" : 1,

              "blinker" : 2, #oscillators
              "toad" : 2,
              "beacon" : 2,

              "glider":4  #spaceships
              }

In [3]:
def consecutive(data, stepsize=1):    
    return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)

In [4]:
folder_name = 'data/'
all_dim = [[i, i] for i in [15, 20, 25, 30, 35, 40]]
all_seed = list(range(100, 200)) 
native = 37.5 
all_native = [native]
graph = False
iterations = 500
list_iterations = list(range(iterations))      # This is useful when decomposing the dataframe

patterns = ['block', 'bee_hive', 'loaf', 'boat', 'tub', 'blinker', 'toad', 'beacon', 'pulsar', 'glider']
others = ['heat', 'occupancy']
columns = ['block', 'bee_hive', 'loaf', 'boat', 'tub', 'blinker', 'toad', 'beacon', 'pulsar', 'glider', 'heat', 'occupancy']

# Analysis of occupancies and heats

tuples = [(str(dim), str(j)) for dim in all_dim for j in all_native] # + [('[50, 50]', '35'), ('[50, 50]', '50'),  ('[50, 50]', '50')]
index = pd.MultiIndex.from_tuples(tuples, names=['dimension', 'native'])
all_freq = []
for dim in all_dim:
    for native in all_native:
        df = pd.DataFrame(columns=columns)
        for seed in all_seed:
            file_name = folder_name+str(dim[0])+"_"+str(dim[1])+"/"+str(native)+"_"+str(seed)+'.csv'
            # if os.path.isfile(file_name):
            df_tmp = pd.read_csv(file_name, sep=';', skiprows=[0], names=columns, header=None)
            for col in df_tmp.columns:
                df_tmp[col] = df_tmp[col].apply(lambda x: ast.literal_eval(str(x)))
            df = pd.concat([df, df_tmp], ignore_index=True)
        
        # Analysis of occupancies and heats
        n = len(all_seed)
        df_per_native = df[others]
        df_res = pd.DataFrame([((lambda z: df_per_native.loc[z:df.shape[0]:iterations])(z)).mean() for z in list_iterations])     # In this way we average on same time for different seeds
        # Analysis of frequencies and average life
        frequencies = []
        for col in patterns:
            keep_track = []
            df_frequencies = df[col]
            df_frequencies = df_frequencies[df_frequencies != 0].dropna()
            for i in range(df_frequencies.shape[0]):
                element = df_frequencies.iloc[i]
                for sub_element in element:
                    chir, rot, x, y = sub_element
                    keep_track.append([x, y, chir, rot, df_frequencies.index[i]])
                
            df_keep_track = pd.DataFrame(keep_track, columns=['x', 'y', 'chir', 'rot', 'time'])
            unique_lists_in_items = df_keep_track.groupby(['x', 'y', 'chir'])['time'].apply(consecutive, stepsize=pattern_period[col])  # .to_numpy()
            freq = 0
            # for k in range(unique_lists_in_items.shape[0]):
            freq += len(unique_lists_in_items)
            # print(unique_lists_in_items)
            # arr_unique = set(unique_lists_in_items)
            # for var in arr_unique:
            #     filtered = consecutive(df_keep_track[df_keep_track['xy'] == var]['time'], stepsize=pattern_period[col])
            # if col == 'blinker':
            #     freq = int((freq1+freq2)/2)
            # else:
            frequencies.append(freq)
        all_freq.append(frequencies)

        print(native)
        if graph:
            plt.title(f"dim = {dim}, native = {native}")
            plt.plot(df_res['occupancy'], label=f'mean occupancy')
            plt.plot(df_res['heat'], label=f'mean heat')
            plt.xlabel('Time')
            plt.ylabel('Data')
            plt.legend(loc='best')
            plt.grid()
            plt.show()

30
35
40
50
60
30
35
40
50
60


In [5]:
# # dim = 50
# tuples += [('[50, 50]', '35'), ('[50, 50]', '50'),  ('[50, 50]', '50')]
# index = pd.MultiIndex.from_tuples(tuples, names=['dimension', 'native'])

# all_dim = [[i, i] for i in [50]]
# all_seed = list(range(100, 150)) 
# for dim in all_dim:
#         df = pd.DataFrame(columns=columns)
#         for seed in all_seed:
#             file_name = folder_name+str(dim[0])+"_"+str(dim[1])+"/"+str(native)+"_"+str(seed)+'.csv'
#             # if os.path.isfile(file_name):
#             df_tmp = pd.read_csv(file_name, sep=';', skiprows=[0], names=columns, header=None)
#             for col in df_tmp.columns:
#                 df_tmp[col] = df_tmp[col].apply(lambda x: ast.literal_eval(str(x)))
#             df = pd.concat([df, df_tmp], ignore_index=True)
        
#         # Analysis of occupancies and heats
#         n = len(all_seed)
#         df_per_native = df[others]
#         df_res = pd.DataFrame([((lambda z: df_per_native.loc[z:df.shape[0]:iterations])(z)).mean() for z in list_iterations])     # In this way we average on same time for different seeds
#         # Analysis of frequencies and average life
#         frequencies = []
#         for col in patterns:
#             keep_track = []
#             df_frequencies = df[col]
#             df_frequencies = df_frequencies[df_frequencies != 0].dropna()
#             for i in range(df_frequencies.shape[0]):
#                 element = df_frequencies.iloc[i]
#                 for sub_element in element:
#                     chir, rot, x, y = sub_element
#                     keep_track.append([x, y, chir, rot, df_frequencies.index[i]])
                
#             df_keep_track = pd.DataFrame(keep_track, columns=['x', 'y', 'chir', 'rot', 'time'])
#             unique_lists_in_items = df_keep_track.groupby(['x', 'y', 'chir'])['time'].apply(consecutive, stepsize=pattern_period[col])  # .to_numpy()
#             freq = 0
#             # for k in range(unique_lists_in_items.shape[0]):
#             freq += len(unique_lists_in_items)
#             # print(unique_lists_in_items)
#             # arr_unique = set(unique_lists_in_items)
#             # for var in arr_unique:
#             #     filtered = consecutive(df_keep_track[df_keep_track['xy'] == var]['time'], stepsize=pattern_period[col])
#             # if col == 'blinker':
#             #     freq = int((freq1+freq2)/2)
#             # else:
#             frequencies.append(freq)

#         all_freq.append(frequencies)

#         print(native)
#         if graph:
#             plt.title(f"dim = {dim}, native = {native}")
#             plt.plot(df_res['occupancy'], label=f'mean occupancy')
#             plt.plot(df_res['heat'], label=f'mean heat')
#             plt.xlabel('Time')
#             plt.ylabel('Data')
#             plt.legend(loc='best')
#             plt.grid()
#             plt.show()

35
50
60


In [6]:
tuples = [(str(dim), str(j)) for dim in [[15, 15], [30, 30]] for j in [30, 35, 40, 50, 60] ] + [('[50, 50]', '35'), ('[50, 50]', '50'),  ('[50, 50]', '60')]
index = pd.MultiIndex.from_tuples(tuples, names=['dimension', 'native'])

In [7]:
df_final = pd.DataFrame(all_freq, index=index, columns=patterns)#/len(all_seed)/iterations
df_final

Unnamed: 0_level_0,Unnamed: 1_level_0,block,bee_hive,loaf,boat,tub,blinker,toad,beacon,pulsar,glider
dimension,native,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"[15, 15]",30,219,80,28,24,24,196,6,6,0,28
"[15, 15]",35,213,86,26,28,22,196,10,3,0,41
"[15, 15]",40,211,68,18,12,18,195,8,1,0,32
"[15, 15]",50,218,76,25,17,18,193,7,2,0,40
"[15, 15]",60,191,84,21,19,22,193,8,2,0,43
"[30, 30]",30,891,501,158,172,152,841,46,13,0,341
"[30, 30]",35,895,518,176,194,173,841,45,17,0,310
"[30, 30]",40,892,534,167,167,160,841,40,17,0,283
"[30, 30]",50,893,514,186,197,150,841,47,9,0,276
"[30, 30]",60,887,506,170,163,146,841,44,13,0,368


In [8]:
rows = [df_final.iloc[i].sum() for i in range(len(patterns)+len(all_native))]
# df_final['blinker'] /= 2
df_final2 = df_final/np.array(rows).reshape(len(rows), -1)*99/(99-5-1.5+1)  # we have neglected Ship, Pond, but considered Pulsar
df_final2

Unnamed: 0_level_0,Unnamed: 1_level_0,block,bee_hive,loaf,boat,tub,blinker,toad,beacon,pulsar,glider
dimension,native,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"[15, 15]",30,0.379513,0.138635,0.048522,0.04159,0.04159,0.339655,0.010398,0.010398,0.0,0.048522
"[15, 15]",35,0.360847,0.145694,0.044047,0.047435,0.037271,0.332047,0.016941,0.005082,0.0,0.069459
"[15, 15]",40,0.396824,0.127886,0.033852,0.022568,0.033852,0.366733,0.015045,0.001881,0.0,0.060182
"[15, 15]",50,0.387288,0.135018,0.044414,0.030201,0.031978,0.342874,0.012436,0.003553,0.0,0.071062
"[15, 15]",60,0.346887,0.152558,0.038139,0.034507,0.039956,0.35052,0.014529,0.003632,0.0,0.078095
"[30, 30]",30,0.302861,0.170296,0.053706,0.058465,0.051667,0.285865,0.015636,0.004419,0.0,0.11591
"[30, 30]",35,0.299037,0.173074,0.058805,0.064819,0.057803,0.280994,0.015035,0.00568,0.0,0.103577
"[30, 30]",40,0.30457,0.182332,0.057021,0.057021,0.054631,0.287156,0.013658,0.005805,0.0,0.096629
"[30, 30]",50,0.303736,0.174827,0.063264,0.067006,0.051019,0.286049,0.015986,0.003061,0.0,0.093876
"[30, 30]",60,0.299291,0.170734,0.057361,0.054999,0.049263,0.28377,0.014846,0.004386,0.0,0.124171


In [9]:
rows2 = [df_final2.iloc[i].sum() for i in range(len(patterns)+len(all_native))]
rows2

[1.0588235294117647,
 1.0588235294117647,
 1.0588235294117647,
 1.0588235294117647,
 1.058823529411765,
 1.0588235294117645,
 1.0588235294117647,
 1.0588235294117647,
 1.0588235294117647,
 1.0588235294117647,
 1.0588235294117647,
 1.0588235294117645,
 1.0588235294117647]

In [10]:
for col in patterns:
    ax = df_final2[col].unstack(level=0).plot(kind='bar', subplots=True, title=[col, col], figsize=(9, 7), xlabel='Natives', ylabel='Relative Frequencies')#, layout=(2, 2))
    plt.tight_layout()
    plt.show()

ValueError: Index contains duplicate entries, cannot reshape