# Foodvocate: Influence Maximization Model
Author: [Meng-Chieh Liu](https://github.com/MengChiehLiu)  
Date: 2023/6/2

In [73]:
import pandas as pd
import numpy as np
from collections import deque
from tqdm import tqdm
from Instagram.data.datasets import fans_count, bloggers
from queue import Queue
from threading import Thread, Lock
import os

In [74]:
n = 578
prob_matrix = pd.read_csv('data/probability_matrix.csv', index_col='Unnamed: 0').to_numpy()

## Expected Influence

In [75]:
fans = np.array(list(fans_count.values()))
costs = fans//10

In [76]:
influence = (np.dot(prob_matrix, fans) + fans).astype(int)
influence_ratio = influence//costs

In [87]:
# expected top 10 bloggers (influence)
{bloggers[i]:influence[i] for i in influence.argsort()[::-1][:10]}

{'aries_8248': 777254,
 'popyummy_mag': 618775,
 '4foodie': 562071,
 'soon6669': 440576,
 'solo_guide': 435759,
 'nini_food0822': 398478,
 'girlstalk.tw': 385906,
 '77.food': 382852,
 'foody_tw': 375881,
 'minmin_rd': 345591}

In [78]:
# expected top 10 bloggers (influence_ratio)
{bloggers[i]:influence_ratio[i] for i in influence_ratio.argsort()[::-1][:10]}

{'chloe_foodii': 1091,
 'iz.ifood': 882,
 'sweetfood_tony': 763,
 'yuly.eat': 753,
 'choosy_in_food': 631,
 '_foodisall': 581,
 'bella_food4life': 560,
 '_foodom': 542,
 'lala_foodlife': 513,
 'titi_eatfood': 463}

## Influence Maximiztion

In [29]:
def runThreading(my_queue, initial_points, func, thread_amount):
    class Worker(Thread):
        def __init__(self, queue, initial_points):
            Thread.__init__(self)
            self.queue = queue
            self.initial_points = initial_points

        def run(self):
            while self.queue.qsize() > 0:
                element = self.queue.get()
                func(self.initial_points)
    
    # workers start working
    workers = [Worker(my_queue, initial_points) for _ in range(thread_amount)]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()

In [82]:
def inforamtionCascade(initial_points, cost_limit=False):
    k = 1000
    lock = Lock()
    cumulated_fans = [np.zeros(n)]
    cost = 0
    for i in initial_points:
        cost += fans_count[i]//10

    my_queue = Queue()
    for i in range(k):
        my_queue.put(i)

    def func(initial_points):
        fans = np.zeros(n)

        for i in range(n):
            if i in initial_points:
                continue
            
            fan = 0
            activated = np.array([False for _ in range(n)])
            new_ones = deque()
            for initial_point in initial_points:
                activated[initial_point] = True
                new_ones.append(initial_point)

            activated[i] = True
            new_ones.append(i)
            
            level = 0
            while new_ones:
                level += 1
                _n = len(new_ones)
                for _ in range(_n):
                    j = new_ones.popleft()
                    fan += fans_count[j]
                    row = prob_matrix[j]
                    p = np.random.uniform(0, 1, n)
                    success = p < row
                    success = success & ~activated
                    new_ones.extend(np.where(success)[0].tolist())
                    activated |= success
                if level == 2:
                    break
            
            if cost_limit:
                fans[i] = fan//(cost+fans_count[i]//10)
            else:
                fans[i] = fan

        lock.acquire()
        cumulated_fans[0] += fans
        lock.release()

    runThreading(my_queue, initial_points, func, os.cpu_count()+1)
    
    cumulated_fans[0] //= k
    
    return cumulated_fans[0].argmax(),  cumulated_fans[0].max()

In [32]:
def greedy(k, cost_limit=False):
    initial_points = []
    influences = []
    for _ in tqdm(range(k)):
        new_point, influence = inforamtionCascade(initial_points, cost_limit)
        initial_points.append(new_point)
        influences.append(influence)
    return initial_points, influences

In [83]:
# Top 10 bloggers (influence)
influences_points, influences = greedy(10, cost_limit=False)
{bloggers[i]:j for i, j in zip(influences_points, influences)}

100%|██████████| 10/10 [41:25<00:00, 248.51s/it]


{'aries_8248': 776739.0,
 'popyummy_mag': 1397739.0,
 '4foodie': 1956492.0,
 'soon6669': 2383690.0,
 'solo_guide': 2793142.0,
 'nini_food0822': 3188340.0,
 'girlstalk.tw': 3568251.0,
 '77.food': 3929904.0,
 'foody_tw': 4296958.0,
 'minmin_rd': 4612913.0}

In [79]:
# Top 10 bloggers (influence_ratio)
influences_ratio_points, influences_ratio = greedy(10, cost_limit=True)
{bloggers[i]:j for i, j in zip(influences_ratio_points, influences_ratio)}

{'chloe_foodii': 1034.0,
 'sweetfood_tony': 959.0,
 'iz.ifood': 890.0,
 'yuly.eat': 889.0,
 'choosy_in_food': 811.0,
 '_foodisall': 747.0,
 'kk8at': 712.0,
 'lala_foodlife': 698.0,
 'rizi_vie_': 643.0,
 'minnie.life_': 637.0}