In [1]:
import os
import pandas as pd
path = ''

In [2]:
import numpy as np


class Agent:
    def __init__(self, probs, user_type, user_id=None):
        self._mean_time = probs.loc['dt_mean']
        #self._std_time = probs.loc['dt_std']
        self._probs = probs.drop(['dt_mean'])
        self._probs = self._probs / self._probs.sum(0)
        self._state = 1
        self.user_type = user_type
        if user_id is None:
            user_id = np.random.randint(1e6)
        self.user_id = user_id
        self._track = pd.DataFrame(columns=['user_pseudo_id', 'event_timestamp', 'event_name', 'user_type'])
        self._current_time = 0
        self.event='';
        
    def _step(self):
        probs = self._probs.loc[:, str(self._state)]
        vals = probs.index
        probs = np.nan_to_num(probs.values)
        if np.sum(probs)!=1 :
            next_step=self.event
        else:
            next_step = np.random.choice(vals, p=probs)
        return next_step

    def _delay(self):
        mu = self._mean_time.loc[str(self._state)]
        #sigma = self._std_time.loc[str(self._state)]
        time = np.random.exponential(scale=mu,size=1)[0]
        #return np.exp(time + 1e-6)
        return time
        
    def simulate(self):
        while (self._state != -1) and (self._state < self._probs.shape[1]):
            self._current_time += self._delay()
            self.event = self._step()
            self._track = self._track.append({
                'user_pseudo_id': self.user_id,
                'event_timestamp': self._current_time,
                'event_name': self.event,
                'user_type': self.user_type
            }, ignore_index=True)
            if self.event != 'lost':
                self._state += 1
            else:
                self._state = -1
        return self._track

In [3]:
class Simulator:
    def __init__(self, path, pops=None):
        self._population = self.create_population(path, pops)
        self.clickstream = pd.DataFrame(columns=['user_pseudo_id', 'event_timestamp', 'event_name', 'user_type'])

    def create_population(self, path, pops=None):
        dyn_mat = os.path.join(path, 'dynamic_matrix')
        stats = os.path.join(path, 'stats')
        files = os.listdir(dyn_mat)
        agents = []
        for idx, file in enumerate(sorted(files)):
            
            if pops is None:
                agents_num = pd.read_csv(os.path.join(stats, file)).users_count.iloc[0]
            else:
                agents_num = pops[idx]
            #clus_dyn = pd.read_excel(os.path.join(dyn_mat, file), sheetname='Sheet1', index_col=[0])
            clus_dyn = pd.read_csv(os.path.join(dyn_mat, file), index_col=[0])
            #pd.read_csv(os.path.join(dyn_mat, file), index_col=[0]).to_excel(os.path.join(dyn_mat, file+'.xlsx'))
            for i in range(agents_num):
                agents.append(Agent(clus_dyn, file.split('_')[1].split('.')[0]))
        return agents

    def simulate(self):
        for agent in self._population:
            self.clickstream = self.clickstream.append(agent.simulate())
        return self.clickstream

In [4]:
dyn_mat = os.path.join(path, 'dynamic_matrix')
stats = os.path.join(path, 'stats')
files = os.listdir(dyn_mat)
files[:5]
pd.read_csv(dyn_mat+'/'+'cluster_-1.csv')
df=pd.read_csv(dyn_mat+'/'+'cluster_0.csv', index_col=[0])
df
#df.truncate(after=len(df.columns)-5, axis="columns")
#df.to_csv(dyn_mat+'/'+'cluster_-1.csv')
#df=df.iloc[:,:-5]
#df.to_csv(dyn_mat+'/'+'cluster_-1.csv')
df.isnull().sum().sum()


0

In [5]:
print(dyn_mat)

dynamic_matrix


In [6]:
sim = Simulator(path, [100] * 10)
clicks = sim.simulate()

In [7]:
clicks.user_type.unique()

array(['-1', '0'], dtype=object)

In [8]:
clicks.head(100)

Unnamed: 0,user_pseudo_id,event_timestamp,event_name,user_type
0,17764,0.043090,screen_0,-1
1,17764,0.634936,sleep_1,-1
2,17764,0.689912,sleep_1,-1
3,17764,2.745347,sleep_1,-1
4,17764,3.788291,sleep_1,-1
5,17764,3.901182,sleep_1,-1
6,17764,4.573156,onboarding_login_Type1,-1
7,17764,6.616828,onboarding_login_Type1,-1
8,17764,7.383663,onboarding_login_Type1,-1
9,17764,7.925307,onboarding_login_Type1,-1


In [67]:
clicks.to_csv('clicks.csv')