In [29]:
from Instagram.data.datasets import datasets, bloggers
import numpy as np
import math
from scipy.optimize import minimize
import pickle
from random import random

In [30]:
# config
doc_size = 91
n = 578 # nodes
max_timeDecay = 30 # days
beta_theta = np.array([random() for _ in range(n*4)]) # initial value

In [31]:
bloggers_index = {v:k for k,v in bloggers.items()}

In [32]:
new_datasets = []

for dataset in datasets:
    new_dataset = []
    first_time_stamp = dataset[0][1]
    for user_name, time_stamp in dataset:
        new_dataset.append((bloggers_index[user_name], (time_stamp-first_time_stamp)/86400))
    new_datasets.append(new_dataset)

In [33]:
A = []
T = []
C = []
W = []
for new_dataset in new_datasets:
    Activated = []
    TimeDacays = []
    c = np.ones(n)
    w = np.array([], dtype=int)

    for i in range(1, len(new_dataset)): # node be triggered (after)
        node2, time2 = new_dataset[i]
        c[node2] = 0
        activated = np.array([])
        timeDacays = np.array([])

        for j in range(i): # node trigger (before)
            node1, time1 = new_dataset[j]
            if time2-time1 <= max_timeDecay:
                x = np.zeros(n*2)
                x[node1] = 1
                x[n+node2] = 1
                activated = np.concatenate([activated, x])
                timeDacays = np.append(timeDacays, time2-time1)

        if timeDacays.size > 0:
            w = np.append(w, node2)
            Activated.append(activated.reshape(-1, n*2))
            TimeDacays.append(timeDacays)
    
    A.append(Activated)
    T.append(TimeDacays)
    C.append(c)
    W.append(w)

In [34]:
def _sigmoid(val):
    return 1 / (1 + math.exp(-val))
sigmoid = np.vectorize(_sigmoid)

def _exponential(val):
    return math.exp(val)
exponential = np.vectorize(_exponential)

def cal_prob(X, beta, theta, time_dacays):
    p = sigmoid(np.dot(X, beta))
    r = exponential(np.dot(X, theta))
    return p*r*exponential(-r*time_dacays)

def cal_neg_prob(X, beta, theta, time_dacays):
    p = sigmoid(np.dot(X, beta))
    r = exponential(np.dot(X, theta))
    return p*exponential(-r*time_dacays) + (1-p)

def cal_g_prob(X, beta):
    p = sigmoid(np.dot(X, beta))
    return (1-p)

In [35]:
iteration = [0]
diagonal = np.eye(n)
g_matrixs = []
for i in range(n):
    g_matrix = np.zeros((n,n))
    g_matrix[:, i] = 1
    g_matrix = np.concatenate((g_matrix, diagonal), axis=1)
    g_matrixs.append(g_matrix)

In [36]:
def negative_log_likelihood(beta_theta):
    beta = beta_theta[:n*2]
    theta = beta_theta[n*2:]

    log_likelihood = 0

    for Activated, TimeDacays, c, w in zip(A, T, C, W):

        for x, dacays, idx in zip(Activated, TimeDacays, w):
            probs = cal_prob(x, beta, theta, dacays)
            neg_probs = cal_neg_prob(x, beta, theta, dacays)
            
            # calculate h
            const = (probs/neg_probs).sum()
            h = np.log(neg_probs*const).sum()

            # calculate g
            g_probs = cal_g_prob(g_matrixs[idx], beta)
            g = (np.log(g_probs)*c).sum()

            log_likelihood -= h+g
    
    iteration[0] += 1
    print(f'\riteration {iteration[0]}', end='')
    return log_likelihood

In [37]:
maxmize_log_likelihood = minimize(negative_log_likelihood, x0=beta_theta)

iteration 9252

  h = np.log(neg_probs*const).sum()


iteration 9254

  df = fun(x) - f0


iteration 13878

  h = np.log(neg_probs*const).sum()


iteration 16193

  h = np.log(neg_probs*const).sum()


iteration 16195

  df = fun(x) - f0


iteration 20819

  h = np.log(neg_probs*const).sum()


iteration 32386

  h = np.log(neg_probs*const).sum()


iteration 32388

  df = fun(x) - f0


iteration 37012

  h = np.log(neg_probs*const).sum()


iteration 50896

In [38]:
with open('beta_theta.pkl', 'wb') as f:
    pickle.dump(maxmize_log_likelihood.x, f)

## Probability Matrix

In [39]:
with open('beta_theta.pkl', 'rb') as f:
    optimized_beta_theta = pickle.load(f)

In [40]:
optimized_beta = optimized_beta_theta[:2*n]
optimized_theta = optimized_beta_theta[2*n:]

In [65]:
probability_matrix = np.array([])

In [66]:
for g_matrix in g_matrixs:
    
    prob_row = sigmoid(np.dot(g_matrix, optimized_beta))
    probability_matrix = np.concatenate((probability_matrix, prob_row))

probability_matrix = probability_matrix.reshape(-1, n)

In [70]:
import pandas as pd

In [74]:
df = pd.DataFrame(probability_matrix)

In [75]:
df.to_csv('probability_matrix.csv')