In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np

np.set_printoptions(suppress=True, precision=3)

sns.set(style='ticks', palette='Set2')
sns.despine()

<matplotlib.figure.Figure at 0x7f68dee2a160>

In [2]:
import redqueen.utils as U
import redqueen.opt_runs as OR
import redqueen.opt_model as OM
import decorated_options as Deco

The RedQueen demo expects data in the following format:

```
{
  "walls": {
    "1": [1,2,3,4,5], # These time-stamps do not contain posts by our user.
    "2": [1,4,5]
  },
  "broadcasts": {
    "redqueen": [1, 4],
    "poisson": [1, 3]
  }
}
```

However, this shifts the task of calculating performance to the JS library, which will require re-implementation and potential bugs. Hence, the performance data should be included with this data.

```
{
  "walls": {
    "1": [1,2,3,4,5],
    "2": [1,4,5]
  },
  "broadcasts": {
    "redqueen": {
        "post_times": [1, 4],
        "performance": {
            "<metric>": [(<timestamp>, <perf>)]
        }
     }, 
    "poisson": {
        "post_times": [1, 3],
        "performance": {
            "<metric>": [(<timestamp>, <perf>)]
        }
    },
  }
}
```

The `<metric>` could be `perf_top_{1,5,10}`, `avg_rank`, `r_2_int`, etc.

# Step 1: Create a SimOpts which will generate raw data

In [3]:
sim_opts_1_follower = OM.SimOpts(
    src_id=0,
    end_time=100,
    s=np.array([1]),
    q=1.0,
    other_sources=[('Hawkes', {'src_id': 1, 'seed': 1, 'l_0': 1.0, 'alpha': 1.0, 'beta': 5.0})],
    sink_ids=[1000],
    edge_list=[(0, 1000), (1, 1000)]
)

# Step 2: Simulate the walls along with different strategies for our broadcaster

In [4]:
%%time
seed = 1
opt_mgr = sim_opts_1_follower.create_manager_with_opt(seed)
opt_mgr.run_dynamic()
opt_df = opt_mgr.state.get_dataframe()
num_opt_tweets = U.num_tweets_of(opt_df, broadcaster_id=0)
perf_opt = {
    'type': 'Opt',
    'seed': seed,
    'capacity': num_opt_tweets,
    'q': sim_opts_1_follower.q
}
OR.add_perf(perf_opt, opt_df, sim_opts_1_follower)

CPU times: user 66.7 ms, sys: 0 ns, total: 66.7 ms
Wall time: 63.1 ms


In [5]:
%%time
seed = 9
poisson_mgr = sim_opts_1_follower.create_manager_with_poisson(seed, capacity=num_opt_tweets)
poisson_mgr.run_dynamic()
poisson_df = poisson_mgr.state.get_dataframe()
num_poisson_tweets = U.num_tweets_of(opt_df, broadcaster_id=0)
perf_poisson = {
    'type': 'Poisson',
    'seed': seed,
    'capacity': num_poisson_tweets,
    'q': sim_opts_1_follower.q
}
OR.add_perf(perf_poisson, poisson_df, sim_opts_1_follower)

CPU times: user 100 ms, sys: 6.67 ms, total: 107 ms
Wall time: 103 ms


# Step 3: Calculate the performance and save JSON output

In [7]:
print('num_opt_tweets = {}, num_poisson_tweets = {}'
      .format(U.num_tweets_of(opt_df, 0), U.num_tweets_of(poisson_df, 0)))

num_opt_tweets = 78.0, num_poisson_tweets = 78.0


In [8]:
print('avg_rank_opt = {}, avg_rank_poisson = {}'
      .format(U.average_rank(opt_df, sim_opts=sim_opts_1_follower), 
              U.average_rank(poisson_df, sim_opts=sim_opts_1_follower)))

avg_rank_opt = 66.8116376874034, avg_rank_poisson = 201.3461740580856


In [9]:
perf_opt

{'avg_rank': 66.811637687403405,
 'capacity': 78.0,
 'num_events': 78,
 'r_2': 138.15727122691501,
 's': 1.0,
 'seed': 1,
 'top_1': 56.517519051405124,
 'type': 'Opt',
 'world_events': 154}

In [10]:
print('top_1_opt = {}, top_1_poisson = {}'
      .format(perf_opt['top_1'], perf_poisson['top_1']))

top_1_opt = 56.517519051405124, top_1_poisson = 39.68728199394901


The output JSON structure, for reference:

```
{
  "walls": {
    "1": [1,2,3,4,5],
    "2": [1,4,5]
  },
  "broadcasts": {
    "redqueen": {
        "post_times": [1, 4],
        "performance": {
            "<metric>": [(<timestamp>, <perf>)]
        }
     }, 
    "poisson": {
        "post_times": [1, 3],
        "performance": {
            "<metric>": [(<timestamp>, <perf>)]
        }
    },
  }
}
```

In [11]:
perf_opt

{'avg_rank': 66.811637687403405,
 'capacity': 78.0,
 'num_events': 78,
 'r_2': 138.15727122691501,
 's': 1.0,
 'seed': 1,
 'top_1': 56.517519051405124,
 'type': 'Opt',
 'world_events': 154}

In [12]:
tmp = U.rank_of_src_in_df(opt_df, 0).mean(1)
list(zip(tmp.index, tmp.values))

[(0.0, 0.0),
 (0.53960583725918543, 1.0),
 (0.53972021861782848, 2.0),
 (0.61909770798845698, 3.0),
 (0.61921208934710004, 0.0),
 (0.70701425826167574, 1.0),
 (0.90834712903543691, 2.0),
 (1.0670270131155948, 0.0),
 (1.1917144404267215, 1.0),
 (1.2885983120804549, 0.0),
 (1.3478466857159106, 1.0),
 (1.3644643223058284, 2.0),
 (1.5539613197979911, 0.0),
 (1.5772781255977351, 1.0),
 (1.6579275545759009, 2.0),
 (1.8248160139485703, 3.0),
 (1.8697462900750719, 4.0),
 (1.9418837295870937, 5.0),
 (1.9788719733383855, 6.0),
 (2.0827306673084838, 0.0),
 (2.3327966872249237, 1.0),
 (2.3445771877269492, 2.0),
 (2.3605663119933471, 0.0),
 (2.47743895623874, 1.0),
 (2.5253334173526443, 2.0),
 (3.0175300026831691, 0.0),
 (3.3827283444815013, 1.0),
 (3.5340012640634368, 0.0),
 (3.709553894911068, 1.0),
 (3.7258160278525145, 2.0),
 (3.8385804132380663, 3.0),
 (3.9303271190277984, 0.0),
 (4.1460630671717382, 1.0),
 (4.5221016800807083, 0.0),
 (4.725618040142133, 1.0),
 (5.5486743603649726, 2.0),
 (5.6

In [11]:
@Deco.optioned('opts')
def perf_to_json(dfs, names, src_id, sink_ids, end_time):
    """Produce a dictionary which captures performance for the demo app."""        
    
    # Assumes that the walls across the data frames are the same.
    eg_df = dfs[0]
    
    walls = {}
    for sink_id in sink_ids:
        walls[sink_id] = eg_df[(eg_df.src_id != src_id) & (eg_df.sink_id == sink_id)].t.tolist()
    
    broadcasts = {}
    for df, name in zip(dfs, names):
        r_t = U.rank_of_src_in_df(df, src_id)
        avg_rank = r_t.mean(1)
        time_at_top = np.where(r_t < 1.0, 1.0, 0.0).mean(1)
        dt = np.diff(np.concatenate([r_t.index.values, [end_time]]))
        broadcasts[name] = {
            'post_times': df[df.src_id == src_id].t.unique().tolist(),
            'performance': {
                'avg_rank': list(zip(avg_rank.index, avg_rank.values)),
                'time_at_top': list(zip(avg_rank.index, np.cumsum(time_at_top * dt)))
            }
        }
        
    return {
        'walls': walls,
        'broadcasts': broadcasts
    }


In [12]:
example_1 = perf_to_json([opt_df, poisson_df], ['redqueen', 'poisson'], 
                         opts=Deco.Options(**sim_opts_1_follower.get_dict()))

In [13]:
U.time_in_top_k(opt_df, src_id=1, K=1, sim_opts=sim_opts_1_follower)

43.482480948594869

In [16]:
example_1['broadcasts']

{'poisson': {'performance': {'avg_rank': [(0.53960583725918543, 1.0),
    (0.53972021861782848, 2.0),
    (0.61909770798845698, 3.0),
    (0.70701425826167574, 4.0),
    (0.90834712903543691, 5.0),
    (1.1917144404267215, 6.0),
    (1.3478466857159106, 7.0),
    (1.3644643223058284, 8.0),
    (1.5772781255977351, 9.0),
    (1.6579275545759009, 10.0),
    (1.8248160139485703, 11.0),
    (1.8697462900750719, 12.0),
    (1.9418837295870937, 13.0),
    (1.9788719733383855, 14.0),
    (2.2866949969676864, 0.0),
    (2.3327966872249237, 1.0),
    (2.3445771877269492, 2.0),
    (2.47743895623874, 3.0),
    (2.5253334173526443, 4.0),
    (3.3827283444815013, 5.0),
    (3.709553894911068, 6.0),
    (3.7258160278525145, 7.0),
    (3.8385804132380663, 8.0),
    (3.8570923162358328, 0.0),
    (3.8748375832319826, 0.0),
    (3.9454817674267062, 0.0),
    (4.1460630671717382, 1.0),
    (4.725618040142133, 2.0),
    (5.3059424485393025, 0.0),
    (5.5486743603649726, 1.0),
    (5.6547599651701477, 2

In [128]:
import json
with open('data/example2.json', 'w') as f:
    json.dump(example_1, f, indent=2)