## 12/31 No Layer Learning OR

In [None]:
import json
import os
import math

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from spikey.logging import Reader
from spikey.viz import *

In [None]:
base = os.path.join('..', 'log', 'no_layer')
FOLDERS = os.listdir(base)

dataset = {folder: Reader(folder=os.path.join(base, folder)) for folder in FOLDERS if os.path.isdir(os.path.join(base, folder))}
del FOLDERS

In [None]:
CONFIG_KEYS = ['weight', 'input', 'neuron', 'synapse', 'readout', 'firing_threshold',
       'processing_time', 'magnitude', 'potential_decay', 'n_neurons',
       'neuron_pct_inhibitory', 'prob_rand_fire', 'refractory_period', 'resting_mv',
       'spike_delay', 'stdp_window', 'learning_rate', 'max_weight', 'trace_decay',
       'n_episodes', 'len_episode', 'n_inputs', # 'step_size', 'connection_probability', 
       'n_outputs', 'name', 'reward_mult', 'punish_mult', 'prob_rand_fire_setter']
HASHABLE_CONFIG_KEYS = ['weight', 'input', 'neuron', 'synapse', 'readout', 'firing_threshold',
       'processing_time', 'magnitude', 'potential_decay', 'n_neurons',
       'neuron_pct_inhibitory', 'prob_rand_fire', 'refractory_period', 'resting_mv',
       'spike_delay', 'stdp_window', 'learning_rate', 'max_weight', 'trace_decay',
       'n_episodes', 'len_episode', 'n_inputs', # 'step_size', 'connection_probability', 
       'n_outputs', 'name', 'reward_mult', 'punish_mult']
RESULT_KEYS = ['cluster_coeff_original', 'cluster_coeff_final', 'accuracy', 'action_entropy']

In [None]:
## Average Results for Params
mean_dataset = {}

for label, reader in dataset.items():
    data = pd.DataFrame(columns=CONFIG_KEYS + RESULT_KEYS)

    key = label.split('-')[-1]

    for i, relevant in enumerate(reader.iter_unique(CONFIG_KEYS + RESULT_KEYS, HASHABLE_CONFIG_KEYS)):        
        temp = {}
        for key in RESULT_KEYS:
            temp[key] = np.mean(relevant[key])

        data.loc[i] = list(relevant[CONFIG_KEYS].iloc[0]) + [temp[key] for key in RESULT_KEYS]

    mean_dataset[label] = data

del data, relevant, temp

In [None]:
print(mean_dataset.keys())

In [None]:
## Output rate basins of attraction
for label, reader in dataset.items():
    out_basins(reader['step_inrates'], reader['step_outrates'], title=label)
del reader

In [None]:
x = 'accuracy'
y = 'action_entropy'

for label, reader in dataset.items():
    X = reader.df[x]
    Y = reader.df[y]

    plt.scatter(X, Y)
    plt.xlabel(x)
    plt.ylabel(y)

    X, Y = np.float32(X), np.float32(Y)
    plt.plot(np.unique(X), np.poly1d(np.polyfit(X, Y, 1))(np.unique(X)))

    plt.title(label)
    plt.show()

del X, Y

In [None]:
## Histogram of Accuracies & Entropies
def dfdist(df: DataFrame, value: str, title: str = ""):
    SCALAR = 1  # To normalize the density to 0, 1

    data = df[value]
    mean = np.mean(data)

    sns.distplot(
        data * SCALAR, 10, hist_kws={"range": (0, SCALAR)}, kde=False, norm_hist=True
    )
    plt.plot([mean * SCALAR, mean * SCALAR], [0, 0.2], label="mean", c="black")

    plt.title(title)
    plt.xlabel(value)
    plt.legend()
    plt.show()


for label in dataset:
    for X in ['accuracy', 'action_entropy']:
        dfdist(dataset[label].df, X, title=label)

In [None]:
for label in dataset:
    zero_entropy = sum(dataset[label]['action_entropy'] == 0) / len(dataset[label]['action_entropy'])
    nonzero_entropy = sum(dataset[label]['action_entropy'] != 0) / len(dataset[label]['action_entropy'])

    print(f"{label}: Entropies: Zero {zero_entropy}%, Nonzero {nonzero_entropy}%")

In [None]:
for label in dataset:
    maxx = np.max(dataset[label]['accuracy'])
    minn = np.min(dataset[label]['accuracy'])

    print(f"{label}: Max Accuracy {maxx}%, min Accuracy {minn}%")

In [None]:
## Quartiles Table
columns = ['Label', '25% accuracy', '50% accuracy', '75% accuracy', 'std accuracy', '25% entropy', '50% entropy', '75% entropy', 'std entropy']
quartiles = pd.DataFrame(columns=columns)

for i, (label, df) in enumerate(dataset.items()):
    df = df.df
    accuracy = df['accuracy']
    entropy = df['action_entropy']

    quartile1_accuracy, median_accuracy, quartile3_accuracy = accuracy.quantile([.25, .5, .75])
    quartile1_entropy, median_entropy, quartile3_entropy = entropy.quantile([.25, .5, .75])
    std_accuracy, std_entropy = np.std(accuracy), np.std(entropy)

    output = {
        'Label': label, 
        '25% accuracy': quartile1_accuracy,
        '50% accuracy': median_accuracy, 
        '75% accuracy': quartile3_accuracy,
        'std accuracy': std_accuracy,
        '25% entropy': quartile1_entropy,
        '50% entropy': median_entropy, 
        '75% entropy': quartile3_entropy,
        'std entropy': std_entropy,
    }
    quartiles.loc[i] = list(output.values())

## results.to_csv('results.csv')
from IPython.display import display, HTML
display(HTML(quartiles.to_html()))

del quartiles, output, df, accuracy, entropy

In [None]:
## Shotgun of Effects of Param Changes w/out scatter
for label, df in dataset.items():
    if label in ['reward', 'matrix']:
        continue
    df = df.df

    X = label.split('-')[-1] if label != 'control' else 'weight'
    Y1 = 'accuracy'
    Y2 = 'action_entropy'

    x = df[X]
    y1 = df[Y1]
    y2 = df[Y2]

    mean_x = mean_dataset[label][X]
    mean_y1 = mean_dataset[label][Y1]
    mean_y2 = mean_dataset[label][Y2]

    if isinstance(x[0], np.ndarray):
        x = [np.mean(value) for value in x.values]
        mean_x = [np.mean(value) for value in mean_x.values]

    plt.plot(mean_x, mean_y1, label=Y1)

    plt.plot(mean_x, mean_y2, label=Y2)

    plt.xlabel(label)
    plt.legend()
    plt.show()

del y1, y2, mean_y1, mean_y2, x, mean_x, X, Y1, Y2, df

In [None]:
# Accuracy violin plots

for label, df in dataset.items():
    df = df.df

    X = label.split('-')[-1] if label != 'control' else 'weight'
    Y1 = 'accuracy'

    x = df[X]
    y1 = df[Y1]

    if isinstance(x[0], np.ndarray):
        x = [np.mean(value) for value in x.values]

    sns.violinplot(x=x, y=y1)

    plt.title(label)
    plt.show()

del x, y1, X, Y1, df

In [None]:
# ax = sns.violinplot(x="day", y="total_bill", hue="smoker",
#                     data=tips, palette="muted")

for label, df in dataset.items():
    df = df.df

    if 'control' in label:
        continue

    X = label.split('-')[-1]
    Y1 = 'accuracy'
    Y2 = 'action_entropy'

    x = df[X]
    y1 = df[Y1]
    y2 = df[Y2]

    if isinstance(x[0], np.ndarray):
        x = [np.mean(value) for value in x.values]

    sns.violinplot(x=np.append(x, 0), y=np.append(y1, 0), hue=np.append(np.zeros(len(x)), [1]), split=True)
    sns.violinplot(x=np.append(x, 0), y=np.append(y2, 0), hue=np.append(np.ones(len(x)), [0]), split=True)

    plt.title(label)
    plt.show()

    del x, y1, y2, X, Y1, Y2, df

In [None]:
## Effect of Label on Accuracy & Entropy
for label, df in mean_dataset.items():
    try:
        if 'control' in label:
            display(HTML(df.to_html()))
        else:
            display(HTML(df[[label.split('-')[-1], 'accuracy', 'action_entropy']].to_html()))
    except Exception as e:
        print(e)
del df, HTML

In [None]:
## Generate average confusion matricies per config setting
for label, reader in dataset.items():
    if label in ['reward', 'matrix']:
        continue
    key = label.split('-')[-1]

    for value, confusions in reader.iter_unique('confusion_final', HASHABLE_CONFIG_KEYS, return_value=True):
        #confusions = reader['confusion_final', np.where((reader.df[HASHABLE_CONFIG_KEYS] == value[1]).all(axis='columns'))[0]]
        # [expected][real]
        O_STATES = ["0", "1"]
        output = {e: {r: 0 for r in O_STATES} for e in ["True", "False"]}

        n_states = 0
        e_counts = {key: 0 for key in ["True", "False"]}

        ## Sum
        for e_state in ["True", "False"]:
            for r_state in O_STATES:
                for confusion in confusions:
                    if e_state not in confusion or r_state not in confusion[e_state]:
                        continue
                    
                    count = int(confusion[e_state][r_state])

                    output[e_state][r_state] += count
                    e_counts[e_state] += count
                    n_states += count

        ## Average
        for e_state in ["True", "False"]:
            for r_state in O_STATES:
                if not e_counts[e_state]:
                    continue

                output[e_state][r_state] = round(output[e_state][r_state] / e_counts[e_state], 2)

        print(f"{label} {value[1][key] if key != 'control' else 'control':.2} {n_states}")
        #print(f"{key} {n_states}")
        
        for e_state, r_states in output.items():
            print(f"{e_state}: {r_states}")
        print()
del output, confusions, reader

In [None]:
## Scatter accuracy of expected 1 vs expected 0
for label, reader in dataset.items():
    if label in ['reward', 'matrix']:
        continue
    unique_params = reader.df[HASHABLE_CONFIG_KEYS].drop_duplicates(inplace=False)

    key = label.split('-')[-1]

    for value, confusions in reader.iter_unique('confusion_final', HASHABLE_CONFIG_KEYS, return_value=True):
        O_STATES = ["True", "False"]
        O_STATES2 = ["0", "1"]

        output = {o: [] for o in O_STATES}    

        for confusion in confusions:
            for e_state, r_states in confusion.items():
                n = 0
                n_right = 0
                for r_state in r_states:
                    if e_state not in confusion or r_state not in confusion[e_state]:
                        continue

                    count = int(confusion[e_state][r_state])

                    n += count
                    if (e_state == 'True' and r_state  == '1') or (e_state == 'False' and r_state == '0'):
                        n_right += count
                if n == 0:
                    print(confusion, r_states)
                output[e_state].append(n_right / n)

        ##
        o1 = list(output.keys())[0]
        o2 = list(output.keys())[1]

        output[o1] = [value * 10 for value in output[o1]]

        plt.scatter(output[o1], output[o2])
        output[o2] = [value * 10 for value in output[o2]]


        # NOTE: distplot accuracies of o1
        sns.distplot(output[o1], 10, hist_kws={'range': (0, 10)}, kde=False, norm_hist=True)

        plt.title(f"{label} @ {value[1][key] if key != 'control' else 'control'} compare accuracies {o1}, {o2}")
        plt.xlabel(f'accuracies {o1}')
        plt.ylabel(f'accuracies {o2}')
        plt.show()
del output, confusions, reader

In [None]:
## Count number of confusion matricies w/ acc(true) + acc(false) > 55%
## Scatter accuracy of expected 1 vs expected 0
for label, reader in dataset.items():
    if label in ['reward', 'matrix']:
        continue
    unique_params = reader.df[HASHABLE_CONFIG_KEYS].drop_duplicates(inplace=False)

    key = label.split('-')[-1]
    for value, confusions in reader.iter_unique('confusion_final', HASHABLE_CONFIG_KEYS, return_value=True):

        O_STATES = ["True", "False"]
        O_STATES2 = ["0", "1"]

        output = {o: [] for o in O_STATES}    
        n_greater = 0
        for confusion in confusions:
            acc = 0

            for e_state, r_states in confusion.items():
                n = 0
                n_right = 0
                for r_state in r_states:
                    if e_state not in confusion or r_state not in confusion[e_state]:
                        continue

                    count = int(confusion[e_state][r_state])

                    n += count
                    if (e_state == 'True' and r_state  == '1') or (e_state == 'False' and r_state == '0'):
                        n_right += count

                acc += n_right / n
            if acc >= 1.05:
                n_greater += 1

        print(f"{label} @ {value[1][key] if key != 'control' else 'control'} n class accuracies > 105% {n_greater} / {len(confusions)}")
del confusions, output

In [None]:
## dW Bin Counts
"""
for key, reader in dataset.items():
    final_w = np.ravel(reader['weights_final', 1][0])
    original_w = np.ravel(reader['weights_original', 1][0])

    if isinstance(final_w[0], str):
        print(f"W did not read properly on {key}!")
        continue

    delta_w = final_w - original_w

    normalized_delta_w = np.int_(delta_w * 5) + 5

    n_converges_zero = np.sum((original_w != 0) & (final_w == 0))

    ## Remove all 0->0s from bincount
    normalized_delta_w = normalized_delta_w[~((original_w == 0) & (final_w == 0))]

    print(key, np.bincount(normalized_delta_w), '\t\tconverges to zero', n_converges_zero)
del delta_w, normalized_delta_w, n_converges_zero, original_w, final_w
"""

In [None]:
## Average weight change
for key, reader in dataset.items():
    if key != 'control':
        continue

    final_w = np.sum(reader['weights_final', 5])
    original_w = np.sum(reader['weights_original', 5])     
    delta_w = final_w - original
    n_nonzero = np.sum(reader['weights_original', 0] != 0)

    print(key, delta_w / n_nonzero)

In [None]:
## Plot accuracy/entropy changes
# Note: this is only for 1 item in dataset!!!
"""
for key, reader in dataset.items():
    accuracies = reader['ep_action_accuracy', 1][0]
    entropies = reader['ep_action_entropy', 1][0]
    rewards = reader['ep_rewards', 1][0]

    # normalize rewards
    try:
        rewards = rewards / (reader['len_episode', 1][0] * reader['reward_mult', 1][0])
    except Exception:
        continue

    x = np.arange(len(accuracies))

    plt.title(f"{key}")
    plt.scatter(x, accuracies, label='accuracies')
    plt.scatter(x, entropies, label='action_entropies')
    plt.scatter(x, rewards, label='normalized ep_rewards')

    plt.ylim(-1, 1)
    plt.legend()
    plt.show()
del rewards, x, entropies, accuracies
"""

In [None]:
for label, reader in dataset.items():
    if label in ['matrix', 'reward']:
        continue
    df = reader.df

    unique_params = df[HASHABLE_CONFIG_KEYS].drop_duplicates(inplace=False)

    key = label.split('-')[-1]
    if key == 'control':
        key = 'neuron'

    ## Todo add double not in df iterator to iter_unique
    for i, value in enumerate(unique_params.iterrows()):
        cluster_coeff_original = df.loc[np.where((reader.df[HASHABLE_CONFIG_KEYS] == value[1]).all(axis='columns'))]['cluster_coeff_original']
        cluster_coeff_final = df.loc[np.where((reader.df[HASHABLE_CONFIG_KEYS] == value[1]).all(axis='columns'))]['cluster_coeff_final']

        plt.title(f"{label} {value[1][key]:.2}")
        plt.scatter(cluster_coeff_original, cluster_coeff_final)
        
        plt.xlabel('orginal')
        plt.ylabel('final')
        plt.show()
del cluster_coeff_final, cluster_coeff_original, df, reader

In [None]:
## Input vs Output vs Sys rates Rates over time
"""
# For logic
for label, reader in dataset.items():
    states = reader['step_states']
    inrates = reader['step_inrates']
    sysrates = reader['step_sysrates']
    outrates = reader['step_outrates']

    for state in [[False, False], [False, True], [True, False], [True, True]]:
        plt.scatter(np.where(np.all(states == state, axis=1), outrates, -1))


        plt.title(state)
        plt.show()
"""

In [None]:
## Input rates vs Output rates scatter -- last len_episodes
N_PLOTS = 5

for label, reader in dataset.items():
    accuracies = reader['accuracy']
    top_accuracies = np.argsort(accuracies).values[-N_PLOTS:]

    inrates = reader['step_inrates', top_accuracies]
    outrates = reader['step_outrates', top_accuracies]
    titles = [f"{label} - {accuracy}" for accuracy in accuracies[top_accuracies]]

    outrates_scatter(inrates, outrates, int(reader['len_episode', 1][0]), N_PLOTS, titles=titles)

del reader, accuracies, top_accuracies, inrates, outrates

In [None]:
## Input rates vs Output rates scatter -- last len_episodes
N_PLOTS = 5

for label, reader in dataset.items():
    accuracies = reader['accuracy']
    low_accuracies = np.argsort(accuracies).values[:N_PLOTS]

    inrates = reader['step_inrates', low_accuracies]
    outrates = reader['step_outrates', low_accuracies]
    titles = [f"{label} - {accuracy}" for accuracy in accuracies[low_accuracies]]

    outrates_scatter(inrates, outrates, int(reader['len_episode', 1][0]), N_PLOTS, titles=titles)
del reader, accuracies, low_accuracies, inrates, outrates, titles

In [None]:
## Input rates vs Output rates scatter -- last len_episodes
N_PLOTS = 5

for label, reader in dataset.items():
    accuracies = reader['accuracy']
    random_accuracies = np.int_(np.arange(0, N_PLOTS))

    inrates = reader['step_inrates', random_accuracies]
    outrates = reader['step_outrates', random_accuracies]
    titles = [f"{label} - {accuracy}" for accuracy in accuracies[random_accuracies]]

    outrates_scatter(inrates, outrates, int(reader['len_episode', 1][0]), N_PLOTS, titles=titles)
del reader, accuracies, low_accuracies, inrates, outrates, titles