In [1]:
import itertools
import pandas as pd

import simulation, evaluation

# Empirical data

In [2]:
df = pd.read_csv("./data/all-weeks-countries.tsv", sep='\t')
df.week = pd.to_datetime(df.week)
df.loc[:, 'item_title'] = df.apply(lambda x: x.season_title if type(x.season_title)!=float else x.show_title, axis=1)
empirical = df[(df.country_name=="United States") & (df.category=="TV")]
empirical = empirical.drop(columns=['country_iso2', 'category', 'show_title', 'season_title'])
n_weeks = empirical.week.nunique()
n_items = empirical.item_title.nunique()

In [3]:
time_empirical = evaluation.time_on_list(empirical, "item_title", "week")
churn_empirical = evaluation.churn(empirical, "item_title", "week")
transitions_empirical = evaluation.movement_prob(empirical, "item_title", "week", "weekly_rank")

# Simulation param sweep

In [4]:
pop_params = [0.2, 0.5, 0.8]
pl_params = [1, 1.5, 2]
param_sweep = list(itertools.product(pop_params, pl_params))

In [6]:
results = []
for pair in param_sweep:
    pop = pair[0]
    pl = pair[1]
    sim = simulation.Simulation(n_items, 100_000, n_weeks, pop, pl)
    sim.init_simulation()
    sim.run_simulation()
    time_sim = evaluation.time_on_list(sim.top_ten, "movie", "iteration")
    churn_sim = evaluation.churn(sim.top_ten, "movie", "iteration")
    transitions_sim = evaluation.movement_prob(sim.top_ten, "movie", "iteration", "rank")
    time_compare = evaluation.compare_distributions(time_empirical, time_sim)
    churn_compare = evaluation.compare_distributions(churn_empirical, churn_sim)
    transitions_compare = evaluation.compare_distributions(transitions_empirical, transitions_sim)
    results.append({'pop': pop, 
                    'pl': pl,
                    'time_ks': time_compare,
                    'churn_ks': churn_compare,
                    'mean_jsd': transitions_compare})    

100%|██████████| 36/36 [01:21<00:00,  2.27s/it]
100%|██████████| 36/36 [01:22<00:00,  2.28s/it]
100%|██████████| 36/36 [01:21<00:00,  2.28s/it]
100%|██████████| 36/36 [01:27<00:00,  2.42s/it]
100%|██████████| 36/36 [01:26<00:00,  2.39s/it]
100%|██████████| 36/36 [01:27<00:00,  2.42s/it]
100%|██████████| 36/36 [01:33<00:00,  2.60s/it]
100%|██████████| 36/36 [01:31<00:00,  2.53s/it]
100%|██████████| 36/36 [01:31<00:00,  2.54s/it]


In [7]:
results.sort(key=lambda x: x['mean_jsd'])
results[0]['time_ks']

KstestResult(statistic=0.3076923076923077, pvalue=0.5881960656115993)

In [10]:
results

[{'pop': 0.5,
  'pl': 2,
  'time_ks': KstestResult(statistic=0.3076923076923077, pvalue=0.5881960656115993),
  'churn_ks': KstestResult(statistic=0.5142857142857142, pvalue=0.00014044378179541567),
  'mean_jsd': 0.15081141983865382},
 {'pop': 0.5,
  'pl': 1,
  'time_ks': KstestResult(statistic=0.3076923076923077, pvalue=0.5881960656115993),
  'churn_ks': KstestResult(statistic=0.4, pvalue=0.0068714202633811654),
  'mean_jsd': 0.16009436075576833},
 {'pop': 0.5,
  'pl': 1.5,
  'time_ks': KstestResult(statistic=0.2857142857142857, pvalue=0.6354849613377005),
  'churn_ks': KstestResult(statistic=0.4, pvalue=0.0068714202633811654),
  'mean_jsd': 0.17915119641859237},
 {'pop': 0.2,
  'pl': 1,
  'time_ks': KstestResult(statistic=0.2857142857142857, pvalue=0.6354849613377005),
  'churn_ks': KstestResult(statistic=0.5428571428571428, pvalue=4.421378315781605e-05),
  'mean_jsd': 0.18533293815159574},
 {'pop': 0.8,
  'pl': 1.5,
  'time_ks': KstestResult(statistic=0.3076923076923077, pvalue=0.588