## Simulate data

In [1]:
# Imports and Function definitions
import numpy as np
import json

def simulate_dictionary(name='Name', N=365):
    dct = { 'name': name }
    
    users_sort = sorted([int(np.random.randint(5,15)) for i in range(N+1)], reverse=np.random.randint(2))
    dct['users'] = [[i, users_sort[i]] for i in range(1,N+1)]
    
    visits_sort = sorted([int(np.random.randint(150)) for i in range(N+1)], reverse=np.random.randint(2))
    dct['pageviews'] = [[i, visits_sort[i]] for i in range(1,N+1)]
    
    #dct['avg_visit_time'] = [[i, round(np.random.gamma(2, 15), 2)] for i in range(1,N+1)]
    avg_visit_time_sort = sorted([int(np.random.randint(3, 60)) for i in range(N+1)], reverse=np.random.randint(2))
    dct['avg_visit_time'] = [[i, avg_visit_time_sort[i]] for i in range(1,N+1)]
    
    return dct

In [2]:
# Create data
customer_list = ['LIF', 'Dansk Erhverv', 'Demo', 'Master', 'Finansforbundet']

data = [simulate_dictionary(cust) for cust in customer_list]

In [23]:
# Save data
with open('activity.json', 'w') as f:
    json.dump(data, f)

## Preprocess Google Analytics data

In [144]:
# Imports and Function definitions
import numpy as np
import pandas as pd
import json

def create_dictionary(name, df):
    dct = { 'name': name }
    
    dct['pageviews'] = df[['day','Pageviews']].values.tolist()
    
    dct['avg_visit_time'] = df[['day','avg_visit_time']].values.astype(int).tolist() #df[['day','avg_visit_time']].values.tolist()
    
    return dct

In [145]:
# Load data
df = pd.read_csv('google_analytics_data.csv')
df = df.iloc[:-1]

In [146]:
# Format date
df['day'] = pd.DatetimeIndex(df['Day Index']).day

# Format session time
time = pd.DatetimeIndex(df['Avg. Session Duration'])
df['avg_visit_time'] = time.hour * 60 + time.minute + time.second / 60
df = df.round(2)

In [147]:
# Create dictionary data
data = [create_dictionary('master', df)]

In [148]:
# Save data
with open('activity.json', 'w') as f:
    json.dump(data, f)

## Meta data

In [143]:
# Create supporting meta data
meta_data = [{
    'axis': [float(df['Pageviews'].min()), float(df['Pageviews'].max()),
             float(df['avg_visit_time'].min()), float(df['avg_visit_time'].max())]
}]

meta_data

[{'axis': [0.0, 159.0, 0.0, 60.12]}]

In [75]:
# Save data
#with open('meta.json', 'w') as f:
#    json.dump(meta_data, f)

# Simulate data for version 2

In [33]:
# Imports and Function definitions
import numpy as np
import json

def simulate_dictionary(name='Name', N=365):
    dct = { 'name': name }
    
    dct['users'] = int(np.random.randint(5,15))
    
    dct['pageviews'] = int(np.random.randint(150))
    
    dct['avg_visit_time'] = int(np.random.randint(3, 60))
    
    return dct

In [34]:
# Create data
customer_list = ['LIF', 'Dansk Erhverv', 'Demo', 'Master', 'Finansforbundet']

data = [simulate_dictionary(cust) for cust in customer_list]

In [35]:
data

[{'name': 'LIF', 'users': 8, 'pageviews': 66, 'avg_visit_time': 17},
 {'name': 'Dansk Erhverv', 'users': 14, 'pageviews': 47, 'avg_visit_time': 44},
 {'name': 'Demo', 'users': 13, 'pageviews': 37, 'avg_visit_time': 15},
 {'name': 'Master', 'users': 12, 'pageviews': 98, 'avg_visit_time': 23},
 {'name': 'Finansforbundet',
  'users': 7,
  'pageviews': 146,
  'avg_visit_time': 53}]

In [36]:
# Save data
with open('activity 1 year.json', 'w') as f:
    json.dump(data, f)