In [1]:
import numpy as np
import random
from collections import defaultdict

In [2]:
class IndependentCascade():
    def __init__(self, delta=1e-5, maxIter=500):
        self.preds = defaultdict(dict)
        self.delta = delta
        self.maxIter = maxIter
        self.succs = defaultdict(dict)
    def createGraph(self,episodes):
        for ep in episodes:
            succs = [ep[ep[:,1] > ep[i,1]][:,0] for i in xrange(len(ep))]
            for i,succ in enumerate(succs):
                for s in succ:
                    r = random.random() / 20
                    self.preds[int(s)][ep[i,0]] = r
                    self.succs[ep[i,0]][int(s)] = r
    def fit(self, episodes):
        self.episodes = episodes
        stop = False
        it = 0
        while not stop:
            p = {}
            
            for d,episode in enumerate(episodes):
                times = np.unique(episode[:,1])
                users = episode[:,0]
        
                #Calcul de PDtu
                p[d] = np.zeros((len(times),len(users)))
                p[d][0,episode[:,1] == times[0]] = 1
                
                for t in xrange(1,len(times)):
                    for u,user in enumerate(users):
                        hasPreds = False
                        pdtu = 1.
                        #for v in episode[episode[:,1] == times[t-1]][:,0]:
                        for v in episode[episode[:,1] < times[t]][:,0]:
                            if (v in self.preds[user]):
                                hasPreds = True
                                pdtu = pdtu * (1 - self.preds[user][v])
                        p[d][t,u] = 1-pdtu if hasPreds else 1
    
            #Maximisation de la vraisemblance    
            dplus  = np.zeros((len(self.preds), len(self.preds)))
            dminus = np.ones((len(self.preds), len(self.preds))) * len(episodes)
            theta  = np.zeros((len(self.preds), len(self.preds)))
            for d,episode in enumerate(episodes):
                times = list(np.unique(episode[:,1]))
                users = episode[:,0]
                for u,uId in enumerate(users):
                    for v,vId in enumerate(users):
                        #if (episode[v,1] <= episode[u,1] + 1):
                        #    dminus[uId, vId] = dminus[uId, vId] - 1
                        dminus[uId, vId] = dminus[uId, vId] - 1
                        #if (episode[v,1] == episode[u,1] + 1):
                        if (episode[v,1] > episode[u,1]):
                            dplus[uId, vId] = dplus[uId, vId] + 1
                            tv = times.index(episode[v,1])
                            theta[uId, vId] = theta[uId, vId] + (self.preds[vId][uId] / p[d][tv, v])
            theta = theta / (dplus + dminus)
            for u in self.preds:
                for v in self.preds[u]:
                    self.preds[u][v] = theta[v,u]
                    self.succs[v][u] = theta[v,u]
                    
            #Calcul de la vraisemblance
            logL = it + 1
            print it, self.score(episodes)
            
            if (it != 0) and ((it == self.maxIter) or (logL - prevLogL < self.delta)):
                stop = True
            it = it+1
            prevLogL = logL
    def inference(self, s0):
        #infected = np.zeros((len(self.succs)+1), bool)
        infected = defaultdict(bool)
        s = []
        s.append(s0)
        t = 1
        stop = False
        while s[t-1] != []:
            s.append([])
            for i in s[t-1]:
                for j in self.succs[i].keys():
                    if (not infected[j]) and (random.random() < self.succs[i][j]):
                        infected[j] = True
                        s[t].append(j)
            t = t + 1
        return s, infected
    def predict(self, s0, nIter=10000):
        #infected = np.zeros((len(self.succs)+1), int)
        suminfected = defaultdict(float)
        for i in xrange(nIter):
            _, infected = self.inference(s0)
            for j in infected.keys():
                suminfected[j] = suminfected[j] + infected[j]
        for j in xrange(len(suminfected)):
            suminfected[j] = suminfected[j] / nIter
        return suminfected
    def score(self, episodes, nIter=10000):
        score = 0
        for episode in episodes:
            times = np.unique(episode[:,1])
            users = episode[:,0]
            sources = users[[episode[:,1] == times[0]]]
            pred = self.predict(sources, nIter)
            rank = np.array(pred.keys())[(-np.array(pred.values())).argsort()]
            scoreEp = 0
            count = 0.0
            for i,u in enumerate(rank):
                if u in users:
                    count += 1.0
                    scoreEp += count / (i+1)
            score += scoreEp / len(users)
        score /= len(episodes)
        return score        
                

In [None]:
# predictions = {}
# predictions[ep] : liste trié des users
def mapEvaluation(predictions):
    
    

In [3]:
def loadEpisodes(datafile="./train"):
    f = open(datafile)
    episodes = []
    for episode in f.readlines():
        ep = episode[:-3]
        ep = np.array([ept.split(":") for ept in ep.split(";")], float)
        ep = np.array(ep, int)
        episodes.append(ep[ep[:,1].argsort()])
    return np.array(episodes)

In [4]:
episodes = loadEpisodes()

In [5]:
ic = IndependentCascade(maxIter=1000)
ic.createGraph(episodes)
ic.fit(episodes)

0

KeyboardInterrupt: 