In [40]:
from Instagram.data.datasets import datasets, bloggers
import numpy as np
from scipy.optimize import minimize
import pickle
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [42]:
# config
n = 578 # nodes
max_timeDecay = 30 # days
beta_theta = np.zeros(n*4) # initialize value
connected_node = 30 # average node connected -> next: adjustable according to fans

In [43]:
bloggers_index = {v:k for k,v in bloggers.items()}

In [44]:
new_datasets = []

for dataset in datasets:
    new_dataset = []
    first_time_stamp = dataset[0][1]
    for user_name, time_stamp in dataset:
        new_dataset.append((bloggers_index[user_name], (time_stamp-first_time_stamp)/86400))
    new_datasets.append(new_dataset)

In [45]:
A = []
T = []
C = []
W = []
for new_dataset in new_datasets:
    Activated = []
    TimeDacays = []
    c = np.ones(n)
    w = []

    for i in range(1, len(new_dataset)): # node be triggered (after)
        node2, time2 = new_dataset[i]
        c[node2] = 0
        activated = np.array([])
        timeDacays = np.array([])

        for j in range(i): # node trigger (before)
            node1, time1 = new_dataset[j]
            if time2-time1 <= max_timeDecay:
                x = np.zeros(n*2)
                x[node1] = 1
                x[n+node2] = 1
                activated = np.concatenate([activated, x])
                timeDacays = np.append(timeDacays, time2-time1)

        if timeDacays.size > 0:
            w.append(node2)
            Activated.append(activated.reshape(-1, n*2))
            TimeDacays.append(timeDacays)
    
    A.append(Activated)
    T.append(TimeDacays)
    C.append(c)
    W.append(w)

In [46]:
diagonal = np.eye(n)
g_matrixs = []
for i in range(n):
    g_matrix = np.zeros((n,n))
    g_matrix[:, i] = 1
    g_matrix = np.concatenate((g_matrix, diagonal), axis=1)
    g_matrixs.append(g_matrix)

In [47]:
### beta's grad ###
# beta here is np.dot(X, beta)
# p = sigmoid(beta)
# r = np.exp(theta)
# prob = p*r*np.exp(-r*time_dacays)
# neg_prob = p*np.exp(-r*time_dacays) + (1-p)

# d(prob) / d(p) = r * np.exp(-r * time_dacays)  
# d(neg_prob) / d(p) = np.exp(-r * time_dacays) - 1
# d(p) / d(beta) = p * (1-p)
# d(prob) / d(beta) = r * np.exp(-r * time_dacays) * p * (1-p)
#                   = prob * (1-p)
# d(neg_prob) / d(beta) = (np.exp(-r * time_dacays) - 1) * p * (1-p)


### theta's grad ###
# theta here is np.dot(X, theta)
# p = sigmoid(beta)
# r = np.exp(theta)
# prob = p*r*np.exp(-r*time_dacays)
# neg_prob = p*np.exp(-r*time_dacays) + (1-p)

# d(prob) / d(r) = (d(p*r)/d(r) * np.exp(-r*time_dacays)) + (d(np.exp(-r*time_dacays))/d(r)* (p*r))
#                = p*np.exp(-r*time_dacays) + -time_dacays*np.exp(-r*time_dacays)*(p*r)
#                = (p - p*r*time_dacays) * np.exp(-r*time_dacays)
#                = p * (1 - r * time_dacays) * np.exp(-r * time_dacays)
# d(neg_prob) / d(r) = d(neg_prob)/d(np.exp(-r*time_dacays)) * d(np.exp(-r*time_dacays))/d(r)
#                    = p * (-time_dacays * np.exp(-r * time_dacays))
#                    = -p * time_dacays * np.exp(-r * time_dacays)
# d(r) / d(theta) = r
# d(prob) / d(theta) = p * (1 - r * time_dacays) * np.exp(-r * time_dacays) * r
#                    = prob * (1 - r * time_dacays)
# d(neg_prob) / d(theta) = -p * time_dacays * np.exp(-r * time_dacays) * r

In [48]:
def sigmoid(X):
    return 1 / (1 + np.exp(-X))

def cal_prob(p, r, time_dacays):
    prob = p*r*np.exp(-r*time_dacays)
    beta_grad = prob * (1-p)
    theta_grad = prob * (1 - r * time_dacays)
    return prob, beta_grad, theta_grad

def cal_neg_prob(p, r, time_dacays):
    neg_prob = p*np.exp(-r*time_dacays)+(1-p)
    neg_beta_grad = (np.exp(-r * time_dacays) - 1) * p * (1-p)
    neg_theta_grad = -p * time_dacays * np.exp(-r * time_dacays) * r
    return neg_prob, neg_beta_grad, neg_theta_grad

In [49]:
### h' grad
# beta here is np.dot(X, beta)
# a = np.log(neg_probs).sum(), b = np.log(const)
# const = (probs/neg_probs).sum()

# d(a) / d(beta) = d(a)/d(neg_probs) * d(neg_probs)/d(beta)
#                = ((1/neg_probs) * neg_beta_grad).sum()
# d(b) / d(beta) = d(b) / d(const) * d(const) / d(beta)
#                = 1/const * (d(probs)/d(beta) * neg_probs - probs * d(neg_probs)/d(beta)) / neg_probs*neg_probs
#                = 1/const * (beta_grad*neg_probs - probs*neg_beta_grad)/neg_probs*neg_probs
# d(h) / d(beta) = d(a)/d(beta) + d(b)/d(beta) 
#                = ((1/neg_probs) * neg_beta_grad).sum() + 1/const * (beta_grad*neg_probs - probs*neg_beta_grad)/neg_probs*neg_probs

# thete is the same as beta

In [50]:
def cal_h_grad(x, const, probs, neg_probs, beta_grad, neg_beta_grad, theta_grad, neg_theta_grad):
    beta_grad = (1/neg_probs)*neg_beta_grad + 1/const * (beta_grad*neg_probs - probs*neg_beta_grad)/neg_probs*neg_probs
    theta_gard = (1/neg_probs)*neg_theta_grad + 1/const * (theta_grad*neg_probs - probs*neg_theta_grad)/neg_probs*neg_probs
    return np.concatenate((np.dot(beta_grad, x), np.dot(theta_gard, x)))

In [51]:
### g' grad
# beta here is np.dot(X, beta)
# p = sigmoid(beta)
# g_probs = 1-p
# g = log(g_probs)*c

# d(g_probs)/d(beta) = -p * (1-p)
# d(g) / d(beta) = d(g)/d(g_probs) * d(g_probs)/d(beta)
#                = c/(1-p) * -p * (1-p)
#                = -c*p

In [52]:
def cal_g_prob(X, beta, c):
    p = sigmoid(np.dot(X, beta))
    return 1-p, -p*c

In [53]:
def negative_log_likelihood(beta_theta):
    beta = beta_theta[:n*2]
    theta = beta_theta[n*2:]

    log_likelihood = 0
    gradients = np.zeros_like(beta_theta)  # initialize gradients

    for Activated, TimeDacays, c, w in zip(A, T, C, W):

        for x, dacays, idx in zip(Activated, TimeDacays, w):
            p = sigmoid(np.dot(x, beta))
            r = np.exp(np.dot(x, theta))
            
            probs, beta_grad, theta_grad = cal_prob(p, r, dacays) # X
            neg_probs, neg_beta_grad, neg_theta_grad = cal_neg_prob(p, r, dacays) # Y
            
            # calculate h
            const = (probs/neg_probs).sum()
            if const == 0: const = 1e-5
            h = np.ma.log(neg_probs).filled(-10).sum() + np.log(const)
            h_grad = cal_h_grad(x, const, probs, neg_probs, beta_grad, neg_beta_grad, theta_grad, neg_theta_grad)

            # calculate g
            g_probs, g_grad = cal_g_prob(g_matrixs[idx], beta, c)
            g = (np.log(g_probs)*c).sum() * connected_node/n
            g_grad = np.dot(g_grad, g_matrixs[idx]) * connected_node/n

            # update
            log_likelihood -= h+g
            gradients -= h_grad
            gradients[:2*n] -= g_grad

    print(f'\rlog_likelihood: {log_likelihood}', end='')
    return log_likelihood, gradients

In [54]:
bounds = [(-10,10) for _ in range(n*4)]

In [55]:
maxmize_log_likelihood = minimize(negative_log_likelihood, x0=beta_theta, method='L-BFGS-B',
                                  jac=True, bounds=bounds, options={'gtol':1e-6})

log_likelihood: 4631.5480327855615

In [56]:
with open('beta_theta.pkl', 'wb') as f:
    pickle.dump(maxmize_log_likelihood.x, f)

## Probability Matrix

In [57]:
with open('beta_theta.pkl', 'rb') as f:
    optimized_beta_theta = pickle.load(f)

In [58]:
optimized_beta = optimized_beta_theta[:2*n]
optimized_theta = optimized_beta_theta[2*n:]

In [59]:
probability_matrix = np.array([])

In [60]:
for g_matrix in g_matrixs:
    
    prob_row = sigmoid(np.dot(g_matrix, optimized_beta))
    probability_matrix = np.concatenate((probability_matrix, prob_row))

probability_matrix = probability_matrix.reshape(-1, n)

In [61]:
df = pd.DataFrame(probability_matrix)
for i in range(n):
    df[i][i] = 0

In [62]:
df.to_csv('probability_matrix.csv')

In [63]:
df[0].sort_values()

0      0.000000
333    0.000022
376    0.000022
323    0.000036
567    0.000092
         ...   
279    0.974568
429    0.975971
330    0.980887
103    0.987199
144    0.988877
Name: 0, Length: 578, dtype: float64

In [67]:
bloggers[103]

'enjoylifestyle.yolo'

In [65]:
test = df.T.describe().T
(test['max']-test['25%']).mean()

0.344719235171288