In [14]:
%pylab inline
%reload_ext line_profiler
import pandas as pd
import itertools
import numpy as np
import networkx as nx

Populating the interactive namespace from numpy and matplotlib


In [2]:
frailty_scale= np.sqrt(0.713)

adoption_hazard_curve = pd.read_csv('../analysis/adopt_hazard.csv', index_col=0)['hazard']
forget_hazard_curve = pd.read_csv('../analysis/forget_hazard.csv', index_col=0)['hazard']
adoption_regressors = pd.read_csv('../analysis/adopt_factors.csv', index_col=0)
forget_regressors = pd.read_csv('../analysis/forget_factors.csv', index_col=0)

exposure_times = {}  # {player1: {clue1: t_exposed, clue2: t_exposed}...}
frailties = {}


In [3]:
def fast_n_triangle_paths(M, edge):
    """ Fast check for triangle closing rule"""
    try:
        from_neighbors = set(M[edge[0]])  # if concept 0 not in network, false
        to_neighbors = set(M[edge[1]])  # if concept 1 not in network, false
        return len(from_neighbors & to_neighbors)  # closes number of existing paths
    except:
        return 0

In [4]:
edges = {}
spokes = []
spurlinks = []
for edge in itertools.combinations(range(1,14), r=2):
    edges['tclue_%i_%i'%tuple(sorted(edge))] = edge
    if sorted(edge)[0] <= 2:
        edges['cclue_%i_%i'%tuple(sorted(edge))] = edge
    elif sum(edge)%2 == 1:  # sum is odd
        edges['cclue_%i_%i'%tuple(sorted(edge))] = (edge[0], edge[0]*100+edge[1])
    else:
        edges['cclue_%i_%i'%tuple(sorted(edge))] = (edge[1], edge[1]*100+edge[0])
        
    if len(set(edge).intersection({1,2})) == 0:
        spurlinks.append('tclue_%i_%i'%tuple(sorted(edge)))
        spurlinks.append('cclue_%i_%i'%tuple(sorted(edge)))
    elif len(set(edge).intersection({1,2})) == 1:
        spokes.append('tclue_%i_%i'%tuple(sorted(edge)))
        spokes.append('cclue_%i_%i'%tuple(sorted(edge)))
        
    

In [9]:
data = {
    "leads": np.random.choice([e for e in edges.keys() if e.startswith('t')], size=np.random.randint(20)),
    "deads": np.random.choice([e for e in edges.keys() if e.startswith('t')], size=np.random.randint(15)),
    "exposed": [np.random.choice([e for e in edges.keys() if e.startswith('t')], size=np.random.randint(15)) for _ in range(3)],
    "pId": "joe",
    "t":40
}
data

{'leads': array(['tclue_1_13', 'tclue_4_7', 'tclue_1_8', 'tclue_8_13', 'tclue_2_10',
        'tclue_12_13', 'tclue_3_12', 'tclue_2_8', 'tclue_2_7', 'tclue_2_3',
        'tclue_3_7', 'tclue_2_13', 'tclue_1_6', 'tclue_2_10', 'tclue_5_11',
        'tclue_6_7', 'tclue_1_4', 'tclue_2_4'], dtype='<U11'),
 'deads': array(['tclue_6_9', 'tclue_2_4', 'tclue_4_8', 'tclue_7_10', 'tclue_6_7',
        'tclue_1_6', 'tclue_6_9', 'tclue_1_3', 'tclue_3_13'], dtype='<U11'),
 'exposed': [array(['tclue_6_11', 'tclue_10_11', 'tclue_10_11', 'tclue_7_8',
         'tclue_7_12', 'tclue_5_6', 'tclue_7_12', 'tclue_8_13'],
        dtype='<U11'),
  array(['tclue_5_6', 'tclue_4_9', 'tclue_8_12', 'tclue_7_8', 'tclue_9_11',
         'tclue_8_12'], dtype='<U11'),
  array(['tclue_4_11', 'tclue_6_11', 'tclue_3_7', 'tclue_2_11',
         'tclue_3_10', 'tclue_4_12', 'tclue_7_12', 'tclue_3_6',
         'tclue_7_12'], dtype='<U11')],
 'pId': 'joe',
 't': 40}

In [8]:



factors = {}

M = nx.from_edgelist([edges[k] for k in data['leads']])
F = nx.from_edgelist([edges[k] for k in data['deads']])

if not hasattr(frailties, data['pId']):  # first call from this player
    frailties[data['pId']] = np.random.normal(loc=0, scale=frailty_scale)

for i, notebook in enumerate(data['exposed'] + [data['leads'], data['deads']]):
    for clueId in notebook:
        nodes = set(edges[clueId])

        # multiple notebooks may explose same clue. Process only the first
        if not hasattr(factors, clueId):  # first encounter amongst visible notebooks
            factors[clueId] = {
                'n_exposures': 1 if i<3 else 0,  # three neighbors
                'n_triangle_paths': fast_n_triangle_paths(M, edges[clueId]),
                'is_link_or_spur': len(nodes.intersection({1,2})) == 0,
                'is_spoke': len(nodes.intersection({1,2})) == 1,
                'is_in_deads': F.has_edge(*nodes),
                'is_in_leads': M.has_edge(*nodes),
                'n_rim_connections': sum([v for k,v in M.degree(nodes-{1,2})]),
                'n_existing_leads': M.number_of_edges()
            } 

            if data['pId'] in exposure_times: # seen player before
                if clueId in exposure_times[data['pId']]:
                    factors[clueId]['seconds_exposed'] = data['t'] - exposure_times[data['pId']][clueId]
                else:  # first exposure
                    exposure_times[data['pId']][clueId] = data['t']
                    factors[clueId]['seconds_exposed'] = 0
            else:
                exposure_times[data['pId']] = {clueId: data['t']}
                factors[clueId]['seconds_exposed'] = 0

        else:  # exposed by a second or third neighbor or is in one of self notebooks
            factors[clueId]['exposures'] = factors[clueId]['exposures'] + 1 if i<3 else factors[clueId]['exposures']  # three neighbors


factors_df = pd.DataFrame(factors).T
factors_df["in_startup_period"] = data['t'] <= 30
1*factors_df

Unnamed: 0,n_exposures,n_triangle_paths,is_link_or_spur,is_spoke,is_in_deads,is_in_leads,n_rim_connections,n_existing_leads,seconds_exposed,in_startup_period
tclue_4_11,1,0,1,0,0,0,0,3,0,1
tclue_4_5,1,0,1,0,0,0,0,3,20,1
tclue_8_9,1,0,1,0,0,0,1,3,0,1
tclue_1_3,1,0,0,1,0,0,1,3,0,1
tclue_2_4,1,0,0,1,0,0,0,3,0,1
tclue_10_12,1,0,1,0,0,0,0,3,20,1
tclue_3_11,1,0,1,0,0,0,1,3,20,1
tclue_1_4,1,0,0,1,0,0,0,3,0,1
tclue_5_10,1,0,1,0,0,0,0,3,0,1
tclue_9_13,1,0,1,0,0,0,0,3,20,1


In [8]:
adoption_factors = factors_df[factors_df['is_in_leads']==0]
adoption_factor_log_impacts = (adoption_factors - adoption_regressors['means'])*adoption_regressors['coef']
adoption_factor_log_impacts['frailty'] = frailties[data['pId']]
adoption_factor_log_impacts

Unnamed: 0,in_startup_period,is_in_deads,is_in_leads,is_link_or_spur,is_spoke,n_existing_leads,n_exposures,n_rim_connections,n_triangle_paths,seconds_exposed,frailty
tclue_9_10,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.352515,-0.0111494,,0.10217
tclue_7_13,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.352515,-0.0111494,,0.10217
tclue_2_9,-0.065194,0.267213,,0.325016,0.052518,0.0349862,0.473299,0.133373,-0.0111494,,0.10217
tclue_12_13,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.571657,-0.0111494,,0.10217
tclue_5_7,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.571657,-0.0111494,,0.10217
tclue_3_4,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.133373,-0.0111494,,0.10217
tclue_3_7,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.133373,-0.0111494,,0.10217
tclue_2_6,-0.065194,0.267213,,0.325016,0.052518,0.0349862,0.473299,0.133373,-0.0111494,,0.10217
tclue_6_10,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.352515,-0.0111494,,0.10217
tclue_9_13,-0.065194,0.267213,,-0.184103,-0.0277337,0.0349862,0.473299,0.571657,0.0604605,,0.10217


In [9]:
adoption_hazard_ratios = exp((adoption_factor_log_impacts).sum(axis=1))
adoption_hazard_ratios

tclue_9_10     2.565113
tclue_7_13     2.565113
tclue_2_9      3.714453
tclue_12_13    3.193586
tclue_5_7      3.193586
tclue_3_4      2.060318
tclue_3_7      2.060318
tclue_2_6      3.714453
tclue_6_10     2.565113
tclue_9_13     3.430666
tclue_1_13     3.714453
tclue_6_12     3.430666
dtype: float64

In [10]:
adoption_hazard_curve.loc[adoption_factors['seconds_exposed']]

0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
Name: hazard, dtype: float64

In [12]:
adoption_hazards = adoption_factors['seconds_exposed'].apply(lambda x: adoption_hazard_curve.loc[int(x)]) * adoption_hazard_ratios
adoption_hazards

tclue_6_13    0.0
tclue_8_10    0.0
tclue_2_10    0.0
tclue_4_10    0.0
tclue_6_9     0.0
tclue_7_9     0.0
tclue_1_7     0.0
tclue_3_5     0.0
tclue_9_10    0.0
tclue_4_9     0.0
tclue_1_8     0.0
dtype: float64

In [13]:
adoption_hazards = np.array()

TypeError: array() missing required argument 'object' (pos 1)

In [20]:
adoption_flags = np.random.binomial(1, p=adoption_hazards, size=len(adoption_hazards))
adoptions = list(adoption_factors.index[adoption_flags==1])  # indicated adoptions
adoptions

[]

In [15]:
np.exp(3)

20.085536923187668

In [28]:
np.corrcoef([ 0.44126749, -0.42049507, -0.42033282,  0.97634381, -1.63995716,
        0.08516478,  0.6751693 , -0.17566285, -0.59943096,  0.01025603,
        0.75900883,  0.53598892, -0.33388192, -1.70663153, -0.59542456,
        0.30459141,  1.26270952, -0.15463952, -1.00530757,  0.33975163,
        0.80005787, -0.9704872 , -0.32664536,  0.50984468, -1.83076194,
       -0.58882816,  1.25064595,  1.30648977,  0.56912635, -0.93162073,
        1.7305599 ,  0.20708487,  1.46512315,  0.88324851,  0.02030905,
       -0.62080379, -1.5229471 , -0.54740679,  0.63626007,  1.35902692,
       -1.41563367,  0.15619407,  0.20324175, -0.16853273, -0.0299296 ,
       -0.38494467, -1.6013614 , -1.84914149, -0.70216884,  0.04194072,
        0.9724456 ,  0.46897603,  0.60068621, -0.41537668,  0.21700771,
       -0.17193326,  0.50851989, -0.71970369, -0.60156546,  0.48214524,
        0.64262627,  0.19213834, -1.36181456, -0.30759162, -0.4127449 ,
        0.84883898,  1.55459562,  0.23779907,  1.1611089 , -0.01639593,
       -0.08278259, -1.27826114,  0.63025367,  0.48089416,  0.85301373,
       -0.09846818,  0.17609656, -1.01739358,  0.21577712,  0.2546806 ],
           [ 1.21893427, -0.68676852,  0.64753704, -0.01022496, -0.54573088,
       -0.28301682, -0.20047632, -1.08237228,  0.0317351 ,  0.64805386,
        0.48403656, -0.14393473, -0.63258988, -0.09122694, -1.03270271,
        0.32061507,  1.03028908,  0.54723382, -0.99036891, -0.10400659,
        0.90904849, -0.15394895,  0.54923577,  0.45287766,  0.83189482,
       -0.69090787, -1.34100533,  0.77090847,  0.46460665, -0.90907996,
        0.14878703,  0.01172324,  1.22980266, -0.30471783,  1.14542727,
       -0.46425339, -0.72716193, -1.02049356,  1.94443999,  0.40509178,
       -0.95112713, -1.00106268,  1.07474664,  0.1472707 , -0.34853381,
        0.02347917, -0.49829195, -0.58809198, -1.18206838, -0.29723053,
       -0.14810726,  0.52955851, -0.95889916, -0.2086616 ,  1.76527351,
        0.26631476,  1.17777095,  0.01911364,  0.4004573 , -1.5019982 ,
       -1.0310089 ,  0.07619845, -0.78139429,  0.47757402,  0.09541521,
        0.92746755,  0.5044701 ,  0.63442975,  1.32081206,  0.05240936,
       -0.52473841, -1.01274198, -0.2488506 ,  0.11162774, -0.41905513,
       -0.51978331,  0.20260884,  0.32246036, -0.20493106, -0.08017254])

array([[1.        , 0.37153328],
       [0.37153328, 1.        ]])

In [34]:
adoption_hazard_curve.loc[127]

127.0    0.001734
127.0    0.001778
Name: hazard, dtype: float64

In [5]:
adoption_hazard_curve.index.value_counts()

499.0    1
212.0    1
186.0    1
188.0    1
190.0    1
        ..
177.0    1
381.0    1
179.0    1
181.0    1
0.0      1
Length: 500, dtype: int64

In [16]:
%%timeit

factors = {}

M = nx.from_edgelist([edges[k] for k in data['leads']])
F = nx.from_edgelist([edges[k] for k in data['deads']])

if not hasattr(frailties, data['pId']):  # first call from this player
    frailties[data['pId']] = np.random.normal(loc=0, scale=frailty_scale)

for i, notebook in enumerate(data['exposed'] + [data['leads'], data['deads']]):
    for clueId in notebook:
        nodes = set(edges[clueId])

        # multiple notebooks may explose same clue. Process only the first
        if not hasattr(factors, clueId):  # first encounter amongst visible notebooks
            factors[clueId] = {
                'n_exposures': 1 if i<3 else 0,  # three neighbors
                'n_triangle_paths': fast_n_triangle_paths(M, edges[clueId]),
                'is_link_or_spur': len(nodes.intersection({1,2})) == 0,
                'is_spoke': len(nodes.intersection({1,2})) == 1,
                'is_in_deads': F.has_edge(*nodes),
                'is_in_leads': M.has_edge(*nodes),
                'n_rim_connections': sum([v for k,v in M.degree(nodes-{1,2})]),
                'n_existing_leads': M.number_of_edges()
            }

            if data['pId'] in exposure_times: # seen player before
                if clueId in exposure_times[data['pId']]:
                    factors[clueId]['seconds_exposed'] = data['t'] - exposure_times[data['pId']][clueId]
                else:  # first exposure
                    exposure_times[data['pId']][clueId] = data['t']
                    factors[clueId]['seconds_exposed'] = 0
            else:
                exposure_times[data['pId']] = {clueId: data['t']}
                factors[clueId]['seconds_exposed'] = 0

        else:  # exposed by a second or third neighbor or is in one of self notebooks
            factors[clueId]['exposures'] = factors[clueId]['exposures'] + 1 if i<3 else factors[clueId]['exposures']  # three neighbors


factors_df = pd.DataFrame(factors).T
factors_df["in_startup_period"] = data['t'] <= 30

adoption_factors = factors_df[(factors_df['is_in_leads']==0) & (factors_df['n_exposures']>0)]
adoption_factor_log_impacts = (adoption_factors - adoption_regressors['means'])*adoption_regressors['coef']
adoption_factor_log_impacts['frailty'] = frailties[data['pId']]
adoption_hazard_ratios = np.exp((adoption_factor_log_impacts).sum(axis=1))
adoption_hazards = adoption_factors['seconds_exposed'].apply(
    lambda x: adoption_hazard_curve.loc[int(x)]) * adoption_hazard_ratios
adoption_flags = np.random.binomial(1, p=adoption_hazards, size=len(adoption_hazards))
adoptions = list(adoption_factors.index[adoption_flags==1])  # indicated adoptions


forget_factors = factors_df[(factors_df['is_in_leads']==1)]
forget_factor_log_impacts = (forget_factors - forget_regressors['means'])*forget_regressors['coef']
forget_factor_log_impacts['frailty'] = frailties[data['pId']]  # todo: should have a separate forget frailty
forget_hazard_ratios = np.exp((forget_factor_log_impacts).sum(axis=1))
forget_hazards = forget_factors['seconds_exposed'].apply(
    lambda x: forget_hazard_curve.loc[int(x)]) * forget_hazard_ratios
forget_flags = np.random.binomial(1, p=forget_hazards, size=len(forget_hazards))
forgets = list(forget_factors.index[forget_flags==1])  # indicated adoptions

n_actions_possible = len(adoptions+forgets)
if n_actions_possible > 0: # can only do one per time period...
    act = np.random.choice(["adopt", "forget"], p=[len(adoptions)/n_actions_possible, len(forgets)/n_actions_possible])
    if act == "adopt":
        choice = np.random.choice(adoptions)
        dest = "promising_leads"
        index = 0 #np.random.randint(len(data['leads']))
    else:
        choice = np.random.choice(forgets)
        dest = "dead_ends"
        index = 0 #np.random.randint(len(data['deads']))
else:
    choice = "wait"
    dest = "na"
    index = "na"

response = {
    "drag": choice != "wait",
    "clueId": choice,
    "dest": dest,
    "index": index,
}

27.6 ms ± 5.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
!pip install line_profiler

Collecting line_profiler
[?25l  Downloading https://files.pythonhosted.org/packages/98/bd/4ff4e59f97897d21b3b7d0c97ac77cedab23e6137a8c8fb3cbc9ee9d1f71/line_profiler-3.0.2.tar.gz (45kB)
[K     |████████████████████████████████| 51kB 2.1MB/s eta 0:00:01
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: line-profiler
  Building wheel for line-profiler (PEP 517) ... [?25ldone
[?25h  Created wheel for line-profiler: filename=line_profiler-3.0.2-cp37-cp37m-macosx_10_15_x86_64.whl size=52585 sha256=a7dc1189800440db6e85d19c681df67a33fdff7b3ab87f4d967998377d7d59f8
  Stored in directory: /Users/jameshoughton/Library/Caches/pip/wheels/16/f4/37/8dd2209f8ef6bd1be2c88b3e1455abf93c544f39d863a912f1
Successfully built line-profiler
Installing collected packages: line-profiler
Successfully installed line-profiler-3.0.2


In [11]:
def pfunc(data=data):
    factors = {}

    M = nx.from_edgelist([edges[k] for k in data['leads']])
    #F = nx.from_edgelist([edges[k] for k in data['deads']])

    if not hasattr(frailties, data['pId']):  # first call from this player
        frailties[data['pId']] = np.random.normal(loc=0, scale=frailty_scale)

    for i, notebook in enumerate(data['exposed'] + [data['leads'], data['deads']]):
        for clueId in notebook:
            nodes = set(edges[clueId])

            # multiple notebooks may explose same clue. Process only the first
            if not hasattr(factors, clueId):  # first encounter amongst visible notebooks
                factors[clueId] = {
                    'n_exposures': 1 if i<3 else 0,  # three neighbors
                    'n_triangle_paths': fast_n_triangle_paths(M, edges[clueId]),
                    'is_link_or_spur': clueId in spurlinks,
                    'is_spoke': clueId in spokes,
                    'is_in_deads': clueId in data['deads'],
                    'is_in_leads': clueId in data['leads'],
                    'n_rim_connections': sum([v for k,v in M.degree(nodes-{1,2})]),
                    'n_existing_leads': len(data['leads'])
                }

                if data['pId'] in exposure_times: # seen player before
                    if clueId in exposure_times[data['pId']]:
                        factors[clueId]['seconds_exposed'] = data['t'] - exposure_times[data['pId']][clueId]
                    else:  # first exposure
                        exposure_times[data['pId']][clueId] = data['t']
                        factors[clueId]['seconds_exposed'] = 0
                else:
                    exposure_times[data['pId']] = {clueId: data['t']}
                    factors[clueId]['seconds_exposed'] = 0

            else:  # exposed by a second or third neighbor or is in one of self notebooks
                factors[clueId]['exposures'] = factors[clueId]['exposures'] + 1 if i<3 else factors[clueId]['exposures']  # three neighbors


    factors_df = pd.DataFrame(factors).T
    factors_df["in_startup_period"] = data['t'] <= 30

    adoption_factors = factors_df[(factors_df['is_in_leads']==0) & (factors_df['n_exposures']>0)]
    adoption_factor_log_impacts = (adoption_factors - adoption_regressors['means'])*adoption_regressors['coef']
    adoption_factor_log_impacts['frailty'] = frailties[data['pId']]
    adoption_hazard_ratios = np.exp((adoption_factor_log_impacts).sum(axis=1))
    adoption_hazards = adoption_factors['seconds_exposed'].apply(
        lambda x: adoption_hazard_curve.loc[int(x)]) * adoption_hazard_ratios
    adoption_flags = np.random.binomial(1, p=adoption_hazards, size=len(adoption_hazards))
    adoptions = list(adoption_factors.index[adoption_flags==1])  # indicated adoptions


    forget_factors = factors_df[(factors_df['is_in_leads']==1)]
    forget_factor_log_impacts = (forget_factors - forget_regressors['means'])*forget_regressors['coef']
    forget_factor_log_impacts['frailty'] = frailties[data['pId']]  # todo: should have a separate forget frailty
    forget_hazard_ratios = np.exp((forget_factor_log_impacts).sum(axis=1))
    forget_hazards = forget_factors['seconds_exposed'].apply(
        lambda x: forget_hazard_curve.loc[int(x)]) * forget_hazard_ratios
    forget_flags = np.random.binomial(1, p=forget_hazards, size=len(forget_hazards))
    forgets = list(forget_factors.index[forget_flags==1])  # indicated adoptions

    n_actions_possible = len(adoptions+forgets)
    if n_actions_possible > 0: # can only do one per time period...
        act = np.random.choice(["adopt", "forget"], p=[len(adoptions)/n_actions_possible, len(forgets)/n_actions_possible])
        if act == "adopt":
            choice = np.random.choice(adoptions)
            dest = "promising_leads"
            index = 0 #np.random.randint(len(data['leads']))
        else:
            choice = np.random.choice(forgets)
            dest = "dead_ends"
            index = 0 #np.random.randint(len(data['deads']))
    else:
        choice = "wait"
        dest = "na"
        index = "na"

    response = {
        "drag": choice != "wait",
        "clueId": choice,
        "dest": dest,
        "index": index,
    }
    return response



In [17]:
%lprun -f pfunc pfunc(data)