### Importation of the libraries

In [1]:
import igraph as ig
import random as rd
import ast 
import pandas as pd 
from igraph import Graph
import community
import numpy as np
import networkx as nx
import copy

### Importation of the data

In [2]:
accounts_data = pd.read_csv('instagram_accounts_corrected.csv', delimiter=';', encoding='ISO-8859-1')
posts_data = pd.read_csv('instagram_posts_corrected.csv', delimiter=';')

# Creation of the dictionnaries

In [3]:
#We decided to summarize all useful data in dictionnaries, as they are easier to work with, and allow the code to be faster.
#This cell defines a dictionnary where the keys are identities of the users and the values a dictionnary summerizing all the characteristics of that user.
accounts={}
for index, row in accounts_data.iterrows():
    user_id = str(row["id_user"])
    nbfollowers = row["nb_followers"]
    nbfollowing = row["nb_following"]
    nb_posts=row["nb_posts"]
    sex=row["sex"]
    followers = ast.literal_eval(row['id_followers'])
    dept = row["department"]
    birth = row["birth_date"]
    accounts[user_id]={"nb_followers": nbfollowers, "nbfollowing": nbfollowing, "nb_posts": nb_posts, "sex":sex, "id_followers" : followers, "department":dept, "birth_date":birth}

In [4]:
#This cell defines a dictionnary where the keys are posts and the values a dictionnary summerizing all the characteristics of that post.
posts={}

for index, row in posts_data.iterrows():
    post_id = str(row['id_post'])
    user_id = str(row['id_user'])
    time = row["time"]
    half_day = row["half_day"]
    views = row['views']
    reposts = row["reposts"]
    likes=row["likes"]
    comments=row["comments"]
    id_post_origin = str(row['id_post_origin'])
    link_clicks = row["link_clicks"]
    donation_tag = row["donation_tag"]
    donation_val = row["donation_val"]
    house_buy = row["house_buy"]
    posts[post_id]={"id_user":user_id, "time":time, "half_day": half_day, "views": views, "reposts": reposts, "likes": likes, "comments":comments, "id_post_origin" : id_post_origin, "link_clicks":link_clicks, "donation_tag":donation_tag, "donation_val":donation_val, "house_buy":house_buy}


## Add a case has_posted to accounts which is True if user_id has posted the Orizon video, false otherwise
for i in posts :
    accounts[posts[i]["id_user"]]["has_posted"]=True
    accounts[posts[i]["id_user"]]["id_post"]=i
for i in accounts :
    if "has_posted" not in accounts[i] :
        accounts[i]["has_posted"]=False

In [5]:
#Creating a dictionnary where the keys are users' identities and the values a dictionnary combining all the useful data of posts and accounts
#This dictionnary 'stats' will be useful for the calculation of the weights of the graph


alpha = 0.0062773976757802595  # alpha corresponds to the probability that a user reposts a post it encounters if there is no data in instagram_posts_corrected
beta = 0.0125134  # beta corresponds to the probability that a user makes a donation if there is no data in instagram_posts_corrected


stats={}

nb_donations=0
nb_click=0
for post, dic in posts.items():
    w_views=dic["views"]/(accounts[dic["id_user"]]["nb_followers"])
    w_likes=dic["likes"]/(accounts[dic["id_user"]]["nb_followers"])
    w_comments=dic["comments"]/(accounts[dic["id_user"]]["nb_followers"])
    w_donations = dic["donation_val"]/accounts[dic["id_user"]]["nb_followers"] if dic["donation_tag"] else 0
    w_has_donated=1/accounts[dic["id_user"]]["nb_followers"] if dic["donation_tag"] else 0
    nb_donations+=1 if dic["donation_tag"] else 0
    if dic["link_clicks"]:
        w_click = 1/accounts[dic["id_user"]]["nb_followers"] 
        nb_click += 1
    else:
        w_click = 0 
    stats[dic["id_user"]]={"w_views":w_views, "w_likes":w_likes, "w_comments":w_comments, "w_donations":w_donations, "w_clicks": w_click , "w_has_donated" : w_has_donated}

for user in accounts :
    w_repost={}
    for follower in accounts[user]["id_followers"] :
        if posts[accounts[str(follower)]["id_post"]]["id_post_origin"]==accounts[user]["id_post"] :
            w_repost[str(follower)]=1
        else :
            w_repost[str(follower)]=alpha
    stats[user]["w_repost"]=w_repost

nb_click = nb_click/len(posts)
nb_donations=nb_donations/len(posts)

# stats[id_user]["w_views"] represents the contribution of each follower of id_user on the number of views accumulated by a Greenpeace post made by id_user
#This contribution may be hypothetic : not every user has posted the Greenpeace video in our original data set. If this is the case, this contribution is the average of those which are not hypothetical 
# It is the same mechanism for stats[id_user]["w_likes"] and so on, except for
# stats[id_user1]["w_repost"] is a dictionnary where stats[id_user1]["w_repost"][id_user2] is the number of repost made by id_user2 for every Greenpeace post of id_user1


#Mean number of clicks, views, comments on every post.
mean_likes=0
mean_views=0
mean_comments=0
mean_clicks=0
for i in posts:
    mean_likes+=posts[i]["likes"]
    mean_views+=posts[i]["views"]
    mean_comments+=posts[i]["comments"]
    if posts[i]["link_clicks"] :
        mean_clicks+=1
mean_likes=mean_likes/len(posts)
mean_comments=mean_comments/len(posts)
mean_views=mean_views/len(posts)
mean_clicks=mean_clicks/len(posts)

# Add keys to stats: number of views, comments, likes, donations, clicks. We distinguish two cases:
# - If a user has posted the Orizon video, the dictionnary's values are the number of views, comments, likes, donations, clicks the post has.
# - If a user has not posted the Orizon video the mean of those factors otherwise
for i in stats :
    if accounts[i]["has_posted"] :
        stats[i]["nb_views"]=posts[accounts[i]["id_post"]]["views"]
        stats[i]["nb_likes"]=posts[accounts[i]["id_post"]]["likes"]
        stats[i]["nb_comments"]=posts[accounts[i]["id_post"]]["comments"]
        if posts[accounts[i]["id_post"]]["link_clicks"] :
            stats[i]["nb_clicks"]=1
        else :
            stats[i]["nb_clicks"]=0
    else :
        n_follow=len(accounts[i]["id_followers"])
        stats[i]["nb_views"]=mean_views
        stats[i]["nb_likes"]=mean_likes
        stats[i]["nb_comments"]=mean_comments
        stats[i]["nb_clicks"]=mean_clicks

# Ponderation of the graph

In [6]:
def weight_function(views,likes,comments,reposts,clicks,donations) :
    """the weight_function gives the weight of an edge."""
    """If that edge is (a,b) then it is depending on the number of views made by b if a has posted the Orizon video and so on for the other variables"""
    return (views+6.25*likes+28.01*comments+57.03*reposts+85.03*clicks+332*donations)/(1+6.25+28.01+57.03+85.03+332)


weights={}
# weights is of the form {(user_id1 , user_id2): weight} where (user_id1 , user_id2) is an edge of our oriented graph.
for i in accounts :
    for j in accounts[i]["id_followers"] :
            weights[(str(i),str(j))]=weight_function(stats[str(i)]["w_views"], stats[str(i)]["w_likes"],stats[str(i)]["w_comments"],stats[str(i)]["w_repost"][str(j)],stats[str(i)]["w_clicks"],stats[str(i)]["w_donations"])

# Ranking of the edges of the graph with Page Rank

In [7]:
def page_rank_score(dico_weights) :
    """Determines the pagerank score of every vertex of the graph"""
    edges=[]
    for (i,j) in dico_weights :
        edges.append((i,j,dico_weights[(i,j)]))
    graph = Graph.TupleList(edges, directed=True, weights=True)
    pagerank_scores = graph.pagerank(weights='weight')
    pagerank_dict = dict(zip(graph.vs['name'], pagerank_scores))
    return pagerank_dict

### Useful appendix functions

In [None]:
# Dictionnaries "bijection" and "bijection_bis" allow us to easily access a user's coordinates in the data thanks to its user identity, and vice versa
bijection_bis= {i: user_id for i,
                               user_id in enumerate(accounts_data['id_user'])}

bijection={}
for i in range(len(bijection_bis)) :
    bijection[bijection_bis[i]]=i


def matrice_adjacence(weights,d,accounts) :
    """This function transforms weigths and accounts into the corresponding adjacency_matrix"""
    n=len(d)
    M=[[0 for i in range(n)] for j in range(n)]
    for i in accounts :
        for j in accounts[i]["id_followers"] :
            a=d[int(i)]
            b=d[j]
            M[a][b]=weights[(i,str(j))]
    return np.array(M)


def cout(id_utilisateur):
    """This function gives the cost of hiring a user in order to publish the Orizon video"""
    return (accounts[str(id_utilisateur)]["nb_followers"] **2)/90


def Tri(C):
    """This function sorts the list C"""
    if len(C)<2:
        return C
    pivot_index = len(C) -1
    pivot = C[-1]

    elements_inf = [x for x in C[:pivot_index] if x[1] <= pivot[1]]
    elements_sup = [x for x in C[:pivot_index] if x[1] > pivot[1]]

    return Tri(elements_sup) + [pivot] + Tri(elements_inf)

# Partitions of the graph

In [9]:
def Louvain(Graph):
    """This function uses the Louvain algorithm to determine the different clusters in our network graph"""
    
    # Créer un graphe à partir de la matrice d'adjacence
    graph_louvain = nx.Graph(Graph)

    # Exécuter l'algorithme de Louvain
    partition = community.best_partition(graph_louvain)

    return partition

# Lists of sponsors

In [10]:
def dico_sponsor(C, partition, max_partition, nb_sponsors):
    """This function gives the user_id of the people the company should hire to post the Orizon video"""
    sponsors = {}
    k = 0
    while k < max_partition:
        k += 1
        i = 0
        while partition[C[i][0]] in sponsors.values():
            i = i + 1
        sponsors[C[i][0]] = partition[C[i][0]]
        C.pop(i)
    while k < nb_sponsors:
        k += 1
        sponsors[C[0][0]] = partition[C[0][0]]
        C.pop(0)
    while k > nb_sponsors:
        k -= 1
        sponsors.popitem()
    return sponsors

In [25]:
def degre():
    L = []
    for user_id in accounts:
        L.append((bijection[int(user_id)],accounts[user_id]["nb_followers"]))
    return Tri(L)
L = degre()
LL=[]
for (a,b) in L :
    LL.append(a)

# Simulation of the propagation once we have the list of our sponsors

In [12]:
def forecast(set_users, adjacency_matrix):
    '''
    This function forecasts the results of the campaign. It needs to have the graph of the network and the 
    user_ids of the people hired by Greenpeace in order to post the Orizon video
    '''
    n1 = len(set_users) # Determine the number of users in the campaign. The variable set_users contains the users our method consider are best to sponsor
    
    results = {"views": 0, "likes": 0, "comments": 0,
               "reposts": 0, "clicks": 0, "donations": 0} #Here, we define a dictionnary which contains all the metrics were interested in in this study. The values are going to be the results of the campaign
    
    n = len(adjacency_matrix)
    for k in range(10): # We iterate over the campaign simulation for 10 rounds
        posters = [] # This list contains all the users who are going to repost at a certain moment 
        set_users_bis=set_users.copy()
        while len(set_users_bis) != 0: #We run the code until there are no influencers left. For each iteration, we simulate the propagation over the graph
            user = set_users_bis.pop()
            posters.append(user)
            
            for j in range(n): #We find the followers of each poster/reposter, to increase the stats of the campaign
                if adjacency_matrix[user][j] != 0 and j not in posters and j not in set_users_bis: #We analyse if there is a probability that one of the followers reposts
                    if stats[str(bijection_bis[user])]["w_repost"][str(bijection_bis[j])]==1 :
                        set_users_bis.append(j) #We add this follower to posters, so his followers can now have access to the post
                    else :
                        p=100000*stats[str(bijection_bis[user])]["w_repost"][str(bijection_bis[j])]
                        c=rd.randint(1,100000)
                        if c<=p :
                            set_users_bis.append(j) #We add this follower to posters, so his followers can now have access to the post
                            
        for poster in posters : #Then, we had the conresponding stats for each repost, increasing the reach of the campaign
            results["views"] += stats[str(bijection_bis[poster])]["nb_views"]
            results["likes"] += stats[str(bijection_bis[poster])]["nb_likes"]
            results["comments"] += stats[str(bijection_bis[poster])]["nb_comments"]
            results["clicks"] += stats[str(bijection_bis[poster])]["nb_clicks"]
            results["donations"] += stats[str(bijection_bis[poster])]["w_donations"]*accounts[str(bijection_bis[poster])]["nb_followers"]
        results["reposts"] += len(posters)-n1
        
    for key in results: #We have to divide by 10 to take in account the fact that we did 10 simulations of propagation on the graph
        results[key] = results[key]/10
    return results

# Final Simulation

In [None]:
#returns hired_users, the people the company should hire to post the Orizon video in order to maximize the impact of the campaign and then the results of the campaign
d=page_rank_score(weights)
page_rank_list=[]
for vertex in d :
    page_rank_list.append((bijection[int(vertex)],d[vertex]/(cout(vertex))))
page_rank_list_sorted=Tri(page_rank_list)
partition=Louvain(matrice_adjacence(weights,bijection,accounts))
hired_users=dico_sponsor(page_rank_list_sorted,partition,max(partition.values())+1,1)
usersList = list(hired_users.keys())
matrix = matrice_adjacence(weights, bijection, accounts)

cout_sponsors_initiaux = [cout(bijection_bis[i]) for i in range(4)]
cout_sponsors = [cout(bijection_bis[x]) for x in usersList]

#Simulation
results = forecast(usersList,matrix)

# Codes for presenting the results

In [None]:
#results with the degree method
res=forecast(LL[:4],matrix)
print(res)
print(LL[:4])

somme = [cout(bijection_bis[x]) for x in LL[:4]]

print(sum(somme))

In [None]:
#results with random sponsors
hired_users=[]
for i in range(4) :
    p=rd.randint(0,3046)
    if p not in hired_users :
        hired_users.append(p)
forecast(hired_users,matrix)

In [None]:
somme_originale = [cout(bijection_bis[x]) for x in [0,1,2,3]]
print(sum(somme_originale))

somme_originale = [cout(bijection_bis[x]) for x in hired_users]
print(sum(somme_originale))