In [8]:
import numpy as np
from collections import defaultdict

In [9]:
def loadEpisodes():
    f = open("cascades_train.txt",'r')
    episodes = []
    for episode in f.readlines():
        episode = episode[:-2]   # pour enlever le dernier ;       
        episode = np.array([epi.split(":") for epi in episode.split(";")], float)
        episode = np.array(episode, int)
        episodes.append(episode[episode[:,1].argsort()])
    return np.array(episodes)


episodes = loadEpisodes()
min([max(epi[:,0]) for epi in episodes])

8

In [12]:
class IC():

    def __init__(self, episodes, nbIter=1):
        self.dplusDico = {}
        self.dmoinsDico = np.zeros((100,100))
        self.theta = np.zeros((100,100))
        self.successeur = defaultdict(dict)
        self.predecesseur = defaultdict(dict)
        self.nbIter = nbIter
        self.episodes = episodes
        
    def random_theta(self):
        for i in range(0,100):
            for j in range(0,100):
                self.theta[i][j] = np.random.random() 
    
    def createGraph(self):
        for episode in self.episodes:
            liste_successeurs = [episode[episode[:,1] > episode[i,1]][:,0] for i in range(len(episode))]
            for i,successeur in enumerate(liste_successeurs):
                for s in successeur:
                    proba = np.random.random() 
                    self.successeur[episode[i,0]][int(s)] = proba
                    self.predecesseur[int(s)][episode[i,0]] = proba
    
    def init_dicoDplus(self):
        for i in range(0,100):
            for j in range(0,100):
                self.dplusDico[i,j] = []
                
    
    def dplus(self):
        
        self.init_dicoDplus()
        
        for d,episode in enumerate(self.episodes):
            for i in range(0,len(episode)):
                for j in range(0,len(episode)):
                    if (episode[i][0] == episode[j][0]):
                        continue;
                    elif (episode[j][0] in episode and episode[i][1] < episode[j][1]):
                        self.dplusDico[episode[i][0],episode[j][0]].append(d)     
          
    
    def dmoins(self):
        
        for episode in self.episodes:
            for i in range(0,len(episode)):
                for j in range(0,100):
                    if (j not in episode[:,0]):
                        self.dmoinsDico[episode[i][0]][j] += 1
    
   

    def pdtu(self):
        
        p = {}
            
        for d,episode in enumerate(self.episodes):
                
            t = np.unique(episode[:,1])   #on récupère tous les temps distincts
                
            users = episode[:,0]  #on récupère tous les utilisateurs
                
            p[d] = np.ones(100) 
                    
            for i in range(1,len(t)):
                for u,user in enumerate(users):
                    hasPred = False
                    pdtu = 1.
                    userV = episode[episode[:,1] < t[i]][:,0]  
                        
                    for v in userV:
                        if (v in self.predecesseur[user]):
                            pdtu = pdtu * (1 - self.theta[v][user])    
                            hasPred = True
                    if hasPred:
                        p[d][u] = 1-pdtu 
    
        return p
    """
    def pdtu(self):
        
        p = {}
            
        for d,episode in enumerate(self.episodes):
                
            t = np.unique(episode[:,1])   #on récupère tous les temps distincts
                
            users = episode[:,0]  #on récupère tous les utilisateurs
                
            p[d] = np.zeros((len(t),len(users)))  #on initialise le dico p à la taille de t et users pour chaque episode
            p[d][0][episode[:,1] == t[0]] = 1  #on initialise pour le 1er temps, la proba à 1
                    
            for i in range(1,len(t)):
                for u,user in enumerate(users):
                    hasPred = False
                    pdtu = 1.
                    userV = episode[episode[:,1] < t[i]][:,0]  #on récupère les users qui ont un temps inferieur au temps courant
                        
                    for v in userV:
                        if (v in self.predecesseur[user]):
                            pdtu = pdtu * (1 - self.theta[v][user])    
                            hasPred = True
                    if hasPred:
                        p[d][i,u] = 1-pdtu 
                    else:
                        p[d][i,u] = 1
    
        return p
    """
    
    def fit(self):
        
        self.createGraph()
        self.dplus()
        self.dmoins()
        self.random_theta()
        
        for i in range(0,self.nbIter):
            
            p = self.pdtu()
            
            for u in range(0,100):
                for v in range(0,100):
                    sommeOP = 0
                    for d in self.dplusDico[u,v]:
                        sommeOP += self.theta[u][v]/p[d][v]
                    self.theta[u][v] = sommeOP/(len(self.dplusDico[u,v]) + self.dmoinsDico[u][v])
    
    
ic = IC(episodes)
ic.fit()

#ic.dplus()
#ic.dmoins()
        

print (ic.theta[0])



[        nan  0.02104416  0.01191484  0.09353492  0.09246978  0.0200761
  0.00261204  0.02463025  0.1597674   0.05262528  0.09887367  0.00636031
  0.08748438  0.035782    0.01165784  0.02407588  0.1232488   0.19499995
  0.0910383   0.05656526  0.01808362  0.08804698  0.08396841  0.00366911
  0.11498294  0.09874148  0.11602122  0.05088655  0.10372771  0.04267662
  0.07438718  0.15779429  0.02994124  0.10567839  0.15207735  0.15237776
  0.03509199  0.03793102  0.10945335  0.1077566   0.02294521  0.0728482
  0.03107829  0.14800505  0.07013512  0.04710161  0.10441058  0.01324676
  0.01385853  0.08908781  0.02364296  0.10812637  0.13059409  0.01575171
  0.04111223  0.03563394  0.08796976  0.13566252  0.16455774  0.09588794
  0.10897416  0.15236707  0.01106351  0.1484472   0.01500584  0.08335317
  0.07831253  0.07055524  0.12341471  0.01033143  0.1245543   0.08757027
  0.0824696   0.00591157  0.03767329  0.00318626  0.02128463  0.06743075
  0.04505193  0.03585159  0.09915196  0.10774228  0.0