 ## Import dependencies

In [1]:
import sys, os
sys.path.append('C:\\Users\\ronal\\Documents\\GitHub\\decision-making\\graph_builder') # Append path where graph_builder.py is saved
from graph_builder import *
import pandas as pd
from copy import deepcopy
import csv
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import softmax

  ## Build all graphs from the available tables

In [2]:
file_names = os.listdir('treebuilderUpdated')
graphs = {}
for file in file_names:
    graphs[file.replace('.csv', '')] = build_graph('treebuilderUpdated\\' + file)

  # We begin to write the path analysis program

  ## Helper functions

In [3]:
# Helper function for analyze_path()
def parse_path_string(path_string):
    a = path_string.replace('p','')
    b = a.split(';')
    b.remove('')
#     e = [tuple(int(s) for s in i.split(',')) for i in d]         Useful for converting strings of tuples in real tuples
    
    return b

In [4]:
# Helper function for analyze_path(). Returns the root node for a graph.
def root_node(graph):
    root = [v for v, d in graph.in_degree() if d == 0][0]
    return root

In [5]:
def leaf_nodes(graph):
    leaves = [v for v, d in graph.out_degree() if d == 0]
    return leaves

In [6]:
# Helper function for analyze_path()
def node_value_calculation(graph, node, steps_so_far, prior_prob):
    new_observations = graph.nodes[node]['new_observations']
    ep = graph.nodes[node]['node_ep']
    node_value = ((steps_so_far * new_observations) + ep) * prior_prob
    
    return node_value

In [7]:
# Helper function for analyze_path(). Returns the node in successors which corresponds to the input_location if it is found. Returns False otherwise.
def location_in_successors(graph, input_location, current_node):
    node_location_dict = (nx.get_node_attributes(graph, 'node_location'))
    successor_list = list(graph.successors(current_node))
    
    for successor in successor_list:
        if node_location_dict[successor] == input_location:
            return successor
        
    else:
        return False

In [8]:
def num_successors(graph, node):
    num_of_successors = len(list(graph.successors(node)))
    
    return num_of_successors

In [9]:
def get_successors(graph, node, index=None): # Returns list of successors. If given an index, returns the successor with that index in the list.
    if index is None:
        return list(graph.successors(node))
    
    else:
        return list(graph.successors(node))[index]

In [206]:
def all_node_paths(graph, source='root'):
    all_paths = []
    
    if source == 'root':
        for path in nx.all_simple_paths(graph, root_node(graph), leaf_nodes(graph)):
            all_paths.append(path)
    else:
        for path in nx.all_simple_paths(graph, source, leaf_nodes(graph)):
            all_paths.append(path)
            
    return all_paths

In [11]:
def possible_subject_paths(graph, subject_sequence): # Input list of nodes subject visited. Returns possible paths that subject could have taken to reach leaf nodes.
                                            # Use if last node in subject_sequence is not in leaf_nodes().
    subject_set = set(subject_sequence)
    num_common_nodes = [] # Number of nodes that each sequence in all_node_paths() has in common with subject_sequence
    for path in all_node_paths(graph):
        num_common_nodes.append(len(set(path) & subject_set))

#     print(num_common_nodes)

    similarity_degree = max(num_common_nodes)

    possible_subject_paths = [path for idx, path in enumerate(all_node_paths(graph)) if num_common_nodes[idx] == similarity_degree]
    
    return possible_subject_paths

In [127]:
def get_last_path_value(graph, path): # Returns node_value of the node sequence (as defined by the last node in the path).
    last_node = path[-1]
    
    path_value = graph.nodes[last_node]['path_value']
    
    return path_value

In [13]:
def extra_node_lists(graph, subject_sequence): # Input list of nodes subject visited. Returns list of sequences of remaining nodes for each possible subject path.
    _possible_subject_paths = possible_subject_paths(graph, subject_sequence)
    num_possible_paths = len(_possible_subject_paths)
    extra_node_lists = [[node for node in _possible_subject_paths[i] if node not in subject_sequence] for i in range(num_possible_paths)]
    
    return extra_node_lists

In [14]:
def prior_prob(graph):
    _root_node = root_node(graph)
    total_black_squares = graph.nodes[_root_node]['black_remains']
    prior_prob = 1/total_black_squares
    
    return prior_prob

In [15]:
def valid_location(graph, step, visited_locations, current_node):
    
    successors = get_successors(graph, current_node)
    node_location_dict = nx.get_node_attributes(graph, 'node_location')
    
    successor_locations = [node_location_dict[k] for k in node_location_dict if k in successors] # Successor locations for current node    
    node_locations = list(node_location_dict.values())
    
    valid_locations = deepcopy(visited_locations)

    valid_locations.update(successor_locations)
    
    if step in node_locations:
        if step in valid_locations:
            pass
        else:
            return False

In [16]:
def alt_node_paths(graph, chosen_path): # Returns dict of all node sequences (except for the full path that the subject chose) that reach leaf nodes and their respective values
    _all_node_paths = all_node_paths(graph)
    alt_node_paths = _all_node_paths.copy()
    alt_node_paths.remove(chosen_path)
    
    path_value_dict = {','.join(path): get_last_path_value(graph, path) for path in alt_node_paths} # Keys are a string of nodes seperated by commas. Use list.split() method to convert into list.
    
    return path_value_dict

 ## Update graph nodes with successor values

In [132]:
def get_path_values(graph, node_list): # Input graph and list of nodes. Returns list of path values for each node.
    path_values = []
    for node in node_list:
        path_values.append(graph.nodes[node]['path_value'])
        
    return path_values

In [135]:
get_path_values(H, ['N6609', 'N2200', 'N6173', 'N5323'])

[0.0, 3.5, 6.2, 8.2]

In [253]:
def set_successor_values(subject_graph, final_path): # Sets attribute 'successor_values' to each node in the subjet's final path; returns nested lists of values for each node
    final_path_values = get_path_values(subject_graph, final_path)
    value_decision_list = []
    for node in final_path:
        successors = get_successors(subject_graph, node)
        successor_values = get_path_values(subject_graph, successors)
        sorted_successor_values = sorted(successor_values, key=lambda node_value: node_value in final_path_values, reverse=True) # The first node_value will be the node_value of the node the subject chose
        
        subject_graph.nodes[node]['successor_values'] = sorted_successor_values
        
        value_decision_list.append(sorted_successor_values)
    value_decision_list.pop()
        
    return value_decision_list

In [254]:
set_successor_values(H, ['N1104', 'N2825', 'N8121', 'N589', 'N7314'])

[[2.2916666666666665],
 [3.458333333333333],
 [4.916666666666666, 5.083333333333333],
 [5.374999999999999, 7.208333333333332]]

In [None]:
def set_leaf_values(subject_graph, node_sequence): # Input the subject's empirical node sequence. Returns a nested list of leaf values for each node the subject visited
    value_decision_list = []
    last_visited_node = node_sequence[-1]
    try:
        chosen_value = [min(get_last_path_value(subject_graph, path) for path in all_node_paths(subject_graph, last_visited_node))] # If the subject didn't reach a leaf node, we assume they had chosen the optimal path_value
        
    except ValueError:
        chosen_value = [get_last_path_value(subject_graph, node_sequence)]
        
    for node in node_sequence:
        value_choices = [get_last_path_value(subject_graph, path) for path in all_node_paths(subject_graph, node)]
#         value_choices = sorted(value_choices, key=lambda value: value in chosen_value, reverse=True) ---> This sorting method assumes the subject chose a path at the beginning of the trial and did not change their mind
        value_choices = sorted(value_choices) # This sorting method assumes the subject chose the optimal path_value at every node unless he empirically makes a suboptimal choice
        value_decision_list.append(value_choices)
        
    for i in range(len(value_decision_list) - 1):
        value_decision_list[i] = sorted(value_decision_list[i], key=lambda value: value in value_decision_list[i + 1], reverse=True)
        subject_graph.nodes[node_sequence[i]]['leaf_values'] = value_decision_list[i]
        subject_graph.nodes[node_sequence[i + 1]]['leaf_values'] = value_decision_list[i + 1]
        
        
    value_decision_list.pop()
    
    return value_decision_list


In [276]:
set_leaf_values(H, ['N1104', 'N2825', 'N8121', 'N8139', 'N8875', 'N8482', 'N3666'])

[[9.0,
  9.416666666666666,
  9.416666666666666,
  9.583333333333332,
  9.833333333333332,
  10.0],
 [9.0,
  9.416666666666666,
  9.416666666666666,
  9.583333333333332,
  9.833333333333332,
  10.0],
 [9.416666666666666,
  9.416666666666666,
  9.833333333333332,
  10.0,
  9.0,
  9.583333333333332],
 [9.416666666666666, 9.833333333333332, 10.0],
 [9.833333333333332, 9.416666666666666],
 [9.833333333333332]]

In [279]:
get_expected_tau(set_leaf_values(H, ['N1104', 'N2825', 'N8121', 'N8139', 'N8875', 'N8482', 'N3666']))

0.4229970380737022

 ## Tau calculations

In [257]:
def value_vs_tau_prob_array(nested_value_lists, taus_array):
    value_vs_tau_prob_array = []
    list_of_value_arrays = [np.array(list) for list in nested_value_lists]
    taus_prior_prob = 1 / taus_array
    
    for value_array in list_of_value_arrays:
        value_row = np.array([])
        
        for tau in taus_array:
            value_row = np.append(value_row, softmax((-value_array) / tau)[0]) # We only need the conditional probability for the first node_value (i.e node_value of node the subject chose)
        
        value_vs_tau_prob_array.append(value_row)
    value_vs_tau_prob_array = np.array(value_vs_tau_prob_array)
    # We insert TAUS_PRIOR_PROB into first row of array to account for prior
    value_vs_tau_prob_array = np.insert(value_vs_tau_prob_array, 0, taus_prior_prob, axis=0)

    return value_vs_tau_prob_array

In [258]:
def posterior_tau_prob(value_vs_tau_prob_array):
    posterior_tau_prob = np.prod(value_vs_tau_prob_array, axis=0)
    
    return posterior_tau_prob

In [259]:
def norm_posterior_tau_prob(posterior_tau_prob):
    sum = np.sum(posterior_tau_prob)
    norm_posterior_tau_prob = posterior_tau_prob / sum
    
    return norm_posterior_tau_prob

In [260]:
def get_expected_tau(nested_value_lists):
    TAUS = np.geomspace(0.05, 50)
    
    _value_vs_tau_prob_array = value_vs_tau_prob_array(nested_value_lists, TAUS)
    _posterior_tau_prob = posterior_tau_prob(_value_vs_tau_prob_array)
    _norm_posterior_tau_prob = norm_posterior_tau_prob(_posterior_tau_prob)
    
    expected_tau = np.sum(TAUS * _norm_posterior_tau_prob)
    
    return expected_tau

## Set edge weight to next-node value

In [23]:
def set_edge_attribute(graph, node, successor, node_value, attribute='weight'):
    graph.succ[node][successor][attribute] = node_value

## Calculate path value give a sequence of nodes by adding node values

In [115]:
def set_node_and_path_value(graph, node_sequence): # Sets value of a path by adding all node values in the input sequence. Assigns the corresponding node_value and path_value to each node
    PRIOR_PROB = prior_prob(graph)
    steps_from_root = 0
    path_value = 0
    
    for node in node_sequence[1:]:
        steps_from_root += graph.nodes[node]['steps_from_parent']
        node_value = node_value_calculation(graph, node, steps_from_root, PRIOR_PROB)
        path_value += node_value
        
        graph.nodes[node]['node_value'] = node_value
        graph.nodes[node]['path_value'] = path_value
        graph.nodes[node]['steps_from_root'] = steps_from_root
        
    return path_value

  ## Path analysis function

In [165]:
def analyze_path(graph, path): # Takes path string as input.
    PRIOR_PROB = prior_prob(graph)
    steps_so_far = 0
    
    current_node_value = 0
    current_node = root_node(graph)
    node_sequence = [root_node(graph)]
    path_value = 0
    input_path = parse_path_string(path)
    LEAF_NODES = leaf_nodes(graph)
    subject_graph = deepcopy(graph)
    steps_from_parent = 0
    
    all_visited_locations = {'(0,0)'} # First visited location is subject's starting position.
    subject_graph.nodes[root_node(subject_graph)]['node_value'] = 0
    
    for step in input_path[1:]:
        if valid_location(subject_graph, step, all_visited_locations, current_node) is False:
            return ['ERROR_PATH', step, current_node]
        else:
            if location_in_successors(subject_graph, step, current_node) is not False:
                steps_so_far += 1
                steps_from_parent += 1
                next_node = location_in_successors(subject_graph, step, current_node)
                subject_graph.nodes[next_node]['steps_from_root'] = steps_so_far
                subject_graph.nodes[next_node]['steps_from_parent'] = steps_from_parent
                
                next_node_value = node_value_calculation(subject_graph, next_node, steps_so_far, PRIOR_PROB)
                subject_graph.nodes[next_node]['node_value'] = next_node_value

                edge_value = node_value_calculation(subject_graph, next_node, steps_so_far, PRIOR_PROB)
                set_edge_attribute(subject_graph, current_node, next_node, edge_value)
                path_value += edge_value
                
                node_sequence.append(next_node)
                next_path_value = path_value
                subject_graph.nodes[next_node]['path_value'] = next_path_value

                current_node = next_node
                print(f'next node is {next_node}')
                print(f'steps from parent: {steps_from_parent}')
                steps_from_parent = 0
                
            else:
                steps_so_far += 1
                steps_from_parent += 1

            all_visited_locations.add(step)
    empirical_path_value = path_value # Saves path node_value at the last node the subject visited in experiment
    empirical_path = node_sequence.copy() # Saves sequence of nodes subject visited in experiment
    empirical_last_node = current_node
    empirical_steps_so_far = steps_so_far
        
#     print('empirical path is', empirical_path)
#     print('empirical step number is', steps_so_far)
#     print('empirical pathvalue is', empirical_path_value)
#     print('empirical last node is', empirical_last_node)
    
#   ------------------------------------------------------------------------------------------------------
    _extra_node_lists = extra_node_lists(subject_graph, empirical_path)
    path_comparison_dict = {}
    
    for node_list in _extra_node_lists:
        node_sequence = empirical_path.copy()
        path_value = empirical_path_value
        current_node = empirical_last_node
        steps_so_far = empirical_steps_so_far
        
        while True:
            try:
                next_node = node_list.pop(0)
                print(f'current_node is {current_node}')
                print(f'next node is {next_node}')
                steps_so_far += subject_graph.succ[current_node][next_node]['weight']
                subject_graph.nodes[next_node]['steps_from_root'] = steps_so_far

                next_node_value = node_value_calculation(subject_graph, next_node, steps_so_far, PRIOR_PROB)
                subject_graph.nodes[next_node]['node_value'] = next_node_value

                edge_value = node_value_calculation(subject_graph, next_node, steps_so_far, PRIOR_PROB)
                set_edge_attribute(subject_graph, current_node, next_node, edge_value)
                path_value += edge_value
                
                node_sequence.append(next_node)
                next_path_value = path_value
                subject_graph.nodes[next_node]['path_value'] = next_path_value
                
                current_node = next_node
                
            except IndexError: # Stop while loop when we have reached the last node in the current node list
                path_comparison_dict[','.join(node_sequence)] = path_value
                break
                
    _all_node_paths = all_node_paths(subject_graph)
    
    for path in _all_node_paths:
        set_path_value(subject_graph, path)
                
    final_path_string = min(path_comparison_dict, key = lambda k: path_comparison_dict[k]) # If the subject did not reach a leaf, this is the path we assume the subject would have taken (the optimal choice).
    final_path = final_path_string.split(',')
    final_path_value = path_comparison_dict[final_path_string]
    print(final_path)
    
    nested_value_lists = set_successor_values(subject_graph, final_path) # set_leaf_values returns nested lists of leaf-node node_value choices for each node in the final path sequence
    expected_tau = get_expected_tau(nested_value_lists)
    
    alt_path_values = sorted(list(
        alt_node_paths(subject_graph, final_path).values()
                                ))
    
    all_values = [str(round(final_path_value, 3))]
    all_values.extend([str(round(num, 3)) for num in alt_path_values])
    
    
    output = [final_path_string, round(final_path_value, 3), round(expected_tau, 3)]
    output.append(';'.join(all_values))
    
    return output, subject_graph
    
#     print(node_sequence)
#     print(path_comparison_dict)

In [166]:
output, H = analyze_path(graphs['cathedral'], 'p(0,0);p(0,1);p(0,2);p(1,2);p(2,2);p(3,2);p(3,1);p(3,0);p(4,0);p(5,0);p(6,0);p(5,0);p(4,0);p(3,0);p(3,1);p(3,2);p(3,3);p(3,4);p(3,5);')
output

next node is N2825
steps from parent: 2
next node is N8121
steps from parent: 3
next node is N589
steps from parent: 2
next node is N7314
steps from parent: 3
next node is N7282
steps from parent: 8
current_node is N7282
next node is N5010
['N1104', 'N2825', 'N8121', 'N589', 'N7314', 'N7282', 'N5010']


['N1104,N2825,N8121,N589,N7314,N7282,N5010',
 9.583,
 0.197,
 '9.583;9.0;9.417;9.417;9.833;10.0']

In [83]:
set_path_value(H, ['N6609', 'N2200', 'N6173', 'N5323'])

8.2

  ## Analyze experimental data

In [None]:
def parse_data(data_file):
    experiment_data_frame = pd.read_csv(data_file, sep='\t')

    previous_subject = ''
    previous_world = ''
    previous_path = ''
    # previous_index = 0
    parsed_data = []

    # i = 0
    for row in experiment_data_frame.itertuples():
        if previous_subject != row[1]:
            parsed_data.append([previous_subject, previous_world, previous_path])

    #     previous_index = index + 2
        previous_subject = row[1]
        previous_world = row[2]
        previous_path = row[3]

    parsed_data.append( # Add last row of the data frame manually, since algorithm above misses it
                      [experiment_data_frame.iloc[-1]['subject'], 
                       experiment_data_frame.iloc[-1]['world'], 
                       experiment_data_frame.iloc[-1]['squarepath']
                      ]
                     )    

    #     if i == 2000:
    #         break
    #     i += 1

    parsed_data.pop(0)

    return parsed_data

In [None]:
parsed_data = parse_data('SquareLabelsWithRT_E2.csv')

In [None]:
def analyze_data(parsed_data):
    input_data = deepcopy(parsed_data)
    output_data = []
    error_data = []

    for row in input_data: # Analyze path that each subject followed
        graph_name = row[1]
        input_path = row[2]
        path_analysis = analyze_path(graphs[graph_name], input_path)
        row.extend(path_analysis)

        if path_analysis[0] == 'ERROR_PATH':
            error_data.append(row)
            
        else:
            output_data.append(row)

    print(f'{len(input_data)} paths analyzed')
    print(f'There are {len(error_data)} error paths')
    
    return output_data, error_data

In [None]:
output_data, error_data = analyze_data(parsed_data)

In [None]:
# Export data analysis as csv
def export_results(output_data):
    column_titles = ['subject', 'world', 'square_path', 'chosen_node_path', 'chosen_value', 'expected_tau', 'all_values'] # The first item in the column 'all_values' is the chosen node_value

    with open('anaylzed_subject_data.csv', 'w') as file:
        file_writer = csv.writer(file, delimiter='\t')
        file_writer.writerow(column_titles)

        for row in output_data:
            file_writer.writerow(row)

In [None]:
export_results(output_data)

In [None]:
# Export error data
def export_error_data(error_data):
    column_titles = ['subject', 'world', 'square_path', 'path_type', 'error_step', 'error_node']

    with open('error_data.csv', 'w') as file:
        file_writer = csv.writer(file, delimiter='\t')
        file_writer.writerow(column_titles)

        for row in error_data:
            file_writer.writerow(row)

In [None]:
export_error_data(error_data)