# main.py

This is the file where we show all of the graphs and analyses we've performed on our datasets.

### Importing Data and Modules

Here, we load all of our data and modules that we're going to use.

In [None]:
import pandas as pd
import numpy as np
from ast import literal_eval
import matplotlib.pyplot as plt
import igraph as ig

In [None]:
PATH_DATA = "data/"
save_frame = False

accounts_data = pd.read_csv(PATH_DATA + "instagram_accounts.csv", converters={'id_followers': literal_eval, 'department': literal_eval})
posts_data1 = pd.read_csv(PATH_DATA + "instagram_posts.csv")
posts_data2 = pd.read_csv(PATH_DATA + "instagram_posts_1211_1611.csv")
posts_data = pd.concat([posts_data1, posts_data2],axis=0)

In [None]:
# Checking if our duplicate user changes anything ...
posts_data = posts_data[posts_data['id_user'] != 603282]
posts_data.reset_index(drop=True, inplace=True)

### Data Cleanup

Here, we check for duplicates, empty values etc. in our Accounts and Posts dataframes. We do end up finding one duplicate user in Accounts so we need to remove him/her, remove him/her from all followers list and recompute the numbers.

Posts data, on the other hand, was far nicer.

In [None]:
# Checking for empty cells across the data 
accounts_data.isnull().values.any()

In [None]:
# Checking for duplicates - and we found one!
print(accounts_data.shape)
duplicated_user_id = accounts_data[ accounts_data['id_user'].duplicated() == True ]["id_user"].values
print(duplicated_user_id)
accounts_data.drop_duplicates(subset=['id_user'], inplace=True, keep=False)
accounts_data.reset_index(drop=True, inplace=True)
accounts_data.drop(columns=["Unnamed: 0"], inplace=True)
print(accounts_data.head())

for idx in range(len(accounts_data)):
    lst = accounts_data.at[idx, "id_followers"]
    for ele in duplicated_user_id:
        if ele in lst:
            lst.remove(ele)
    accounts_data.at[idx, "id_followers"] = lst

accounts_data["nb_followers"] = accounts_data["id_followers"].apply(len)

In [None]:
dict_following = {key: 0 for key in accounts_data["id_user"].values}
for idx in range(len(accounts_data)):
    lst = accounts_data.at[idx, "id_followers"]
    for ele in lst:
        dict_following[ele] += 1

accounts_data.set_index("id_user", inplace = True)
for key , value in dict_following.items():
    accounts_data.at[key,"nb_following"] = value

accounts_data.reset_index(inplace=True)
accounts_data.head()

In [None]:
# No duplicates across Posts! :)
posts_data[ posts_data['id_user'].duplicated() == True ]

In [None]:
# How our duplicate user fares across posts:
# He's a terminal node on the graph, so there's little to no effect from him.
posts_data[posts_data["id_user"] == 603282]

Exporting our data

In [None]:
if save_frame == True: 
    posts_data.to_csv('data/new/instagram_posts.csv')
    accounts_data.to_csv('data/new/instagram_accounts.csv')


### Exploratory Data Analysis

In this section, we can take a look at statistical properties of our data.

#### Accounts

In [None]:
## Sex Data
accounts_data['sex'].value_counts()

In [None]:
accounts_data['department'].value_counts().sort_values(ascending=False)

In [None]:
accounts_data["nb_followers"].hist()
plt.show()
accounts_data["nb_followers"].describe()

In [None]:
accounts_data["nb_following"].hist()
plt.show()
accounts_data["nb_following"].describe()

In [None]:
age = pd.to_datetime(accounts_data["birth_date"])
age = (pd.to_datetime("today") - age) / np.timedelta64(1, "Y")
age[age < 0] += 100

accounts_data["age"] = age.astype('int32')
print(accounts_data["age"].describe())

accounts_data["age"].hist(histtype="bar", ec="black")
plt.title("Ages of our users")
plt.ylabel("# of users")
plt.xlabel("Binned ages")
plt.show()

#### Posts

In [None]:
## House_Buy Data
posts_data['house_buy'].value_counts()

In [None]:
posts_data["views"].hist()
posts_data["views"].describe()

In [None]:
posts_data['link_clicks'].value_counts()

In [None]:
posts_data["id_post_origin"].value_counts(sort=True, ascending=False)

In [None]:
posts_data[["id_post","reposts"]].sort_values(by="reposts", ascending=False)

In [None]:
posts_data.head()

In [None]:
# Replacing idiotic timestamps
posts_data['time'] = posts_data['time'].apply(lambda str: str.replace("00:", "12:"))
posts_data['time'] = posts_data['time'].apply(lambda str: str.replace("13:", "01:"))

In [None]:
# Getting the hourly distribution of posts.
posts_data['time24'] = posts_data['time'] + " " + posts_data['half_day'].apply(lambda str: str.upper())
posts_data['time24'] = pd.to_datetime(posts_data['time24'], format="%I:%M %p")

posts_data['time24'].dt.hour.hist(bins=24, rwidth=0.5)
plt.xticks(rotation=90)

## Graph Construction

This section constructs the graphs of Accounts and Posts that we'll use in the metrics to follow. 

## _Accounts_ Graph Construction

In [None]:
mappingFrNodeToUserId = dict(zip(range(len(accounts_data)), accounts_data['id_user']))
mappingFrUserIdToNode = {v: k for k,v in mappingFrNodeToUserId.items()}
dict_followers = dict( zip(accounts_data['id_user'], accounts_data['id_followers']) )
edges=[(mappingFrUserIdToNode[node_i], node_j) for node_i in dict_followers.keys() for node_j in list(map(lambda x: mappingFrUserIdToNode[x], dict_followers[node_i]))]

# print(edges)
accounts = ig.Graph(edges=edges, directed=True)
accounts.vs["size"] = 1
accounts.layout_lgl()
# igraph.plot(accounts)

In [None]:
acc_diameters = accounts.get_diameter()
print(acc_diameters)

_Conclusion_: Our user have broad connections, not deep ones.

## _Posts_ Graph Construction

Here we construct the graph for the posts. Each node is identified by its index in the dataframe, and then, its attributes are given as node attributes. All edges are added based on this. 
- Note: this has to be a directed graph to respect  the flow of information i.e. A -> B means that information flows from A to B. 

In [None]:
def construct_posts_graph(graph_data): 
    graph = ig.Graph(directed=True); 
    graph.add_vertices(graph_data.index.values)
    for idx in graph_data.index: 
        ### if id_post_origin is zero, append it to its own list
        #   These are the original posts.
        id_origin, id_post, id_user = graph_data.iloc[idx][['id_post_origin', 'id_post','id_user']] 
        # Add the vertex properties first 
        graph.vs[idx]['id_post'] = id_post
        graph.vs[idx]['id_post_origin'] = id_origin
        graph.vs[idx]['id_user'] = id_user    
        
        # Now, if it's not an origin post, 
        # we can add the corresponding edge. 
        if id_origin != 0 :
            # For this, we take the id_origin and 
            # get the index of that post. Then, 
            # we can connect those two nodes :)
            orig_index = graph_data.index[graph_data['id_post'] == id_origin].values[0]
            graph.add_edges([ (orig_index, idx) ])

    graph.vs['size']=7
    graph.vs['arrow_size']=1
    graph.vs['arrow_width']=1
    return graph
posts = construct_posts_graph(posts_data)
ig.plot(posts)

In [None]:
# Finding out the components of our posts-graph
post_components = posts.clusters(mode='weak')
print(len(post_components))

# and getting the users attributed for each.

# _Metrics_

Let's compute our KPI's!

## Metric 1: Interactivity

Here, we implement the first metric proposed in the first deliverable. This measures the number of likes, clicks, reposts, donations etc. that each post has. We can use this to create a composite ranking, which then provides us a KPI to maximise. 

### Weak Components

We expect 4 components - one associated to each of the 4 original posts used to seed our userbase. And that's what we get ! Of course, we have weak components because we're working in a Directed Acyclic Graph, so we'll need to ignore the direction of our edges to find components.

In [None]:
def interactivity(graph, graph_data):
    # We get the nodes representing the original posts 
    # i.e. have original post ID = 0.
    original_posts = graph.vs(id_post_origin_eq=0)
    # print('Vertex IDs of the Original Posts: ', original_posts[:]['name'])

    components = dict( zip(original_posts[:]['name'], graph.clusters(mode='weak') ) )
    # print("Length of each component: ", [len(c) for c in components.values()])
    cumulative_interactions = {key : {} for key in original_posts[:]['name']} 
    for key in cumulative_interactions:
        cumulative_interactions[key]['like']    = 0
        cumulative_interactions[key]['comment'] = 0
        cumulative_interactions[key]['repost']  = 0
        cumulative_interactions[key]['clicks']  = 0
        cumulative_interactions[key]['donations_tag_count'] = 0
        cumulative_interactions[key]['donations_value']  = 0

    # print(cumulative_interactions)
    # print(components)
    for comp_idx in components:
        for node in components[comp_idx]:
            # print(node)
            row = graph_data.loc[node]

            cumulative_interactions[comp_idx]['like']    += row['likes']#.values[0]
            cumulative_interactions[comp_idx]['comment'] += row['comments']#.values[0]
            cumulative_interactions[comp_idx]['repost']  += row['reposts']#.values[0]
            cumulative_interactions[comp_idx]['clicks']  += row['link_clicks']#.values[0].astype(int)
            cumulative_interactions[key]['donations_tag_count'] += row["donation_tag"]
            cumulative_interactions[key]['donations_value'] += row['donation_val']

    cumulative_interactions = pd.DataFrame(cumulative_interactions)
    cumulative_interactions['Total'] = cumulative_interactions.sum(axis=1)

    return cumulative_interactions

interactivity_df = interactivity(posts, posts_data)
print(interactivity_df.head())

def get_interactivity_score(interactivity_df):
    coeff = {'like': 1, 'comment': 2, 'repost': 3, 'clicks': 4, 'donation_tag':5, "donations_value":0}
    
    return sum(interactivity_df['Total'] * list(coeff.values()))

print("\nInteractivity Score: ", get_interactivity_score(interactivity_df))

def get_donation_value(interactivity_df):
    return interactivity_df.at["donations_value", "Total"]

print("\nDonation Value: ", get_donation_value(interactivity_df))
    

## Metric 2 : Reachability

### Metric 2a: Visibility
Here, we implement a metric proposed as part of the first deliverable, visibility. This measures of the total number of views that the campaign have.

In [None]:
def visibility(graph, graph_data):
    original_posts = graph.vs(id_post_origin_eq=0)
    components = dict( zip(original_posts[:]['name'], graph.clusters(mode='weak') ) )
    cumulative_visibility = {key : {} for key in original_posts[:]['name']} 
    for key in cumulative_visibility:
        cumulative_visibility[key]['views']   = 0
    
    for comp_idx in components:
        for node in components[comp_idx]:
            row = graph_data.loc[node]
            cumulative_visibility[comp_idx]['views'] += row['views']#.values[0]

    cumulative_visibility = pd.DataFrame(cumulative_visibility)
    cumulative_visibility['Total'] = cumulative_visibility.sum(axis=1)
    
    return cumulative_visibility

print(visibility(posts, posts_data))

def get_total_visibility(visibility_df):
    return visibility_df["Total"].values[0]

print("Total Visibility: ", get_total_visibility(visibility(posts, posts_data)))

### Metric 2b: Virality

Virality is the speed at which the campaign was propagated. The notion of speed, will be provided for by the diameter of the each connected components. The diameter is inversely proportional to the speed of the campaign as the diameter denotes how many degree of separation between the source node and the "furthest" node.

In [None]:
def get_component_diameters(posts): 
    subgraphs = posts.decompose(mode='weak')
    diameters = [subgraph.diameter() for subgraph in subgraphs]
    return diameters

print("Depth of each Original Post:", get_component_diameters(posts))

# diameter_paths = [subgraph.get_diameter() for subgraph in subgraphs]
# print("Actual Path taken by each Depth-y post: ", diameter_paths)

# Conclusion: 

def get_max_diameter(components_diameters):
    return max(components_diameters)

print("Longest Diameter: ", get_max_diameter(get_component_diameters(posts)))

# Evaluation and Analysis of Previous Campaign

In [None]:
def node_properties(posts): 
    # We get the nodes representing the original posts 
    # i.e. have original post ID = 0.
    original_posts = posts.vs(id_post_origin_eq=0)    
    original_posters = original_posts[:]['id_user']
    print(original_posters)

    # Now we evaluate our criteria
    original_posters_nodes = [mappingFrUserIdToNode[x] for x in original_posters]
    original_posters_closeness = np.array(accounts.closeness(original_posters_nodes, mode="out"))
    original_posters_betweenness = np.array(accounts.betweenness(original_posters_nodes)) / ((accounts.vcount() - 1) * (accounts.vcount() - 2))
    original_posters_pagerank = np.array(accounts.pagerank(original_posters_nodes))
    
    # and we return them properly formatted 
    return pd.DataFrame.from_dict({
         'nodes': original_posters_nodes, 
         'closeness': original_posters_closeness, 
         'betweenness': original_posters_betweenness, 
         'pagerank': original_posters_pagerank, 
        })
print("Node properties of our most important posts: ")
print(node_properties(posts))

## Seeding Strategies

In [None]:
##Network's best K nodes
K = 4

def get_k_nodes_random (nodes = K):
    accounts_random = range(len(accounts_data))
    return np.random.choice(accounts_random, K, replace=False)

print(get_k_nodes_random(nodes=K))

def get_k_nodes_closeness(nodes = K):
    accounts_closeness = np.array(accounts.closeness(mode="out"))
    accounts_closeness_bestKnodes = np.argpartition(accounts_closeness,-K)[-K:]
    return np.flip(accounts_closeness_bestKnodes)

print(get_k_nodes_closeness(nodes = K))

def get_k_nodes_betweenness(nodes = K):
    accounts_betweenness = np.array(accounts.betweenness()) / ((accounts.vcount() - 1) * (accounts.vcount() - 2))
    accounts_betweenness_bestKnodes = np.argpartition(accounts_betweenness,-K)[-K:]
    return np.flip(accounts_betweenness_bestKnodes)

print(get_k_nodes_betweenness(nodes = K))

def get_k_nodes_pagerank(nodes= K):
    accounts_pagerank = np.array(accounts.pagerank())
    accounts_pagerank_bestKnodes = np.argpartition(accounts_pagerank,-K)[-K:]
    return np.flip(accounts_pagerank_bestKnodes)

print(get_k_nodes_pagerank(nodes= K))

### Probability of Clicks

The probability that a user who has seen a post will click on the link to the site.

In [None]:
prob_click = posts_data['link_clicks'].sum()/len(posts_data)
print("Probability of clicking on a post: ", prob_click*100, "%")

### Probability of Donation

Evaluated as the number of donors over the number of possible donors(number of site visitors). `prob_donation` gives the probability that someone donated given that they clicked on the link to the site.

In [None]:
donors= posts_data[posts_data["donation_tag"]]
print( "Number of users who went to the website: ", len(posts_data[posts_data["link_clicks"]]) )
print ("Number of donors:", len(donors[donors['donation_val']>0]))
prob_donation =  len(donors[donors['donation_val']>0])/len(posts_data[posts_data["link_clicks"]])
print("Empirical probabilty of donation: ", prob_donation*100, "%")

# Simulation

Here, we finally begin our simulations!

### Merged Dataset

The `merged_dataset` is a tool for us to quickly access users and posts without excessive querying - nothing to worry about :)

The model starts right after this.

In [None]:
merged_dataset_all = posts_data.merge(accounts_data, on=["id_user"], how='left')
merged_dataset = pd.DataFrame(merged_dataset_all[["id_user", "views", "reposts", "likes", "comments", "link_clicks", "donation_tag", "donation_val", "nb_followers"]])
merged_dataset["percent_views"] = merged_dataset["views"]/merged_dataset["nb_followers"]
merged_dataset["percent_reposts"] = merged_dataset["reposts"]/merged_dataset["views"]
merged_dataset["percent_likes"] = merged_dataset["likes"]/merged_dataset["views"]
merged_dataset["percent_comments"] = merged_dataset["comments"]/merged_dataset["views"]
# merged_dataset[merged_dataset["percent_comments"]> 1]

merged_dataset.set_index(['id_user'],inplace=True)
merged_dataset.head()

## Machine Learning Regression Model for Donation Value

In [None]:
from sklearn.compose import make_column_selector as selector 
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.linear_model import SGDRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

In [None]:
mlmodel_data = merged_dataset_all[merged_dataset_all["donation_val"] > 0]

# X_variables_columns = ["nb_followers", "nb_following", "time24", "nb_posts", "sex", "department", "age"]
X_variables_columns = ["nb_followers", "nb_following", "time24", "nb_posts", "department", "age"]
X_variables = pd.DataFrame(mlmodel_data[X_variables_columns])
X_variables["time24"] = X_variables["time24"].apply(lambda x : x.hour)
X_variables["department"] = X_variables["department"].apply(lambda x : x[1])

target_column = ["donation_val"]
target = mlmodel_data[target_column]
target = target.to_numpy().reshape(len(target),)

numerical_columns_selector = selector(dtype_exclude=object)
categorical_columns_selector = selector(dtype_include=object)

numerical_columns = numerical_columns_selector(X_variables)
categorical_columns = categorical_columns_selector(X_variables)

categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")
numerical_preprocessor = MinMaxScaler()

preprocessor = ColumnTransformer([
    ('one-hot-encoder', categorical_preprocessor, categorical_columns),
    ('minmax_scaler', numerical_preprocessor, numerical_columns)])

regr = SGDRegressor(max_iter=10000)
mlmodel = make_pipeline(preprocessor, regr)

_ = mlmodel.fit(X_variables, target)

In [None]:
def get_predicted_donation_value(id_user):
    X_variables_columns = ["nb_followers", "nb_following", "time24", "nb_posts", "department", "age"]
    X_var = merged_dataset_all[merged_dataset_all["id_user"] == id_user]
    X_var = pd.DataFrame(X_var[X_variables_columns])
    X_var["time24"] = X_var["time24"].apply(lambda x : x.hour)
    X_var["department"] = X_var["department"].apply(lambda x : x[1])    
    
    donation_val = mlmodel.predict(X_var)

    return max(0, int(donation_val))

## Model

Compartment Model under consideration.

Poster ---> Followers ----> 

                            View ----> Reposts

                            View ----> Comment

                            View ----> Like

Poster ---> Link Click ---> Donation

The simulation essentially works as follows. 

We achieve this using a simple Breadth First Traversal of the accounts graph, assuming that the best possible seeds have been chosen to start from. This choice of seeds effectively decides the strategy. 

In case of a repost, we populate a simulated `posts_data` data called `new_posts_data`. 

The number of reposts is given by the outdegree of the graph. This is because each outgoing edge in the `Posts` graph represents a poster->reposter link between posts. 

In [None]:
rand = lambda n, p : np.random.uniform(0, 1, n) < p

## strategy: {"random", "closeness", "betweenness", "pagerank"}
def simulation(strategy, nodes):
    strategies = {"random": get_k_nodes_random, "closeness": get_k_nodes_closeness, "betweenness": get_k_nodes_betweenness, "pagerank": get_k_nodes_pagerank}
    # Select the initial seeds x
    seeded_nodes = strategies[strategy](nodes=K)
    # seeded_nodes = [0, 1, 2, 3] # seeds from previous campaign
    new_posts_data = pd.DataFrame(columns=['id_user', 'id_post', 'views',
        'reposts', 'likes', 'comments', 'id_post_origin', 'link_clicks',
        'donation_tag', 'donation_val'])

    # Seeding the table
    counter=1
    for node in seeded_nodes: 
        id_user = accounts_data.at[node,'id_user']
        row = merged_dataset.loc[id_user]
        data = accounts_data[accounts_data['id_user'] == node]


        # Creating the entries 
        followers = np.array( data['id_followers'] ) 
        new_views = followers[rand(followers.size, row['percent_views'])]

        # And then filter the number of likes and comments it gets ....
        new_likes = new_views[rand(new_views.size, row['percent_likes'])]
        new_comments = new_views[rand(new_views.size, row['percent_comments'])]
        
        # Finally the number of clicks and donors
        new_click = np.random.random() < prob_click
        new_donor = False
        donation_value = 0
        if new_click: 
            # Probability of donating
            new_donor = np.random.random() < prob_donation
        if new_donor:
            donation_value = get_predicted_donation_value(id_user)
            if donation_value <= 0:
                new_donor = False

        # Entering the entries 
        new_posts_data = new_posts_data.append({
            'id_user': id_user, 
            'id_post': counter,
            'id_post_origin': 0, 
            'views': len(new_views),
            'likes': len(new_likes), 
            'comments': len(new_comments),
            'reposts':  0, # A posteriori 
            'link_clicks': new_click, 
            'donation_tag': new_donor, 
            'donation_val': donation_value, 
        }, ignore_index=True)
        counter += 1
        
    # Initialisation of our long walk 
    frontier  = [accounts_data.at[node,'id_user'] for node in seeded_nodes] ## To begin with seeded nodes
    seen_list = { key : False for key in accounts_data['id_user'] }
    for node in frontier: 
        seen_list[node] = True
    
    # The long walk ...
    while len(frontier) > 0: 
        node = frontier.pop(0)
        data = accounts_data[accounts_data['id_user'] == node]
        row  = merged_dataset.loc[node] 
        followers = np.array( data['id_followers'].values[0] ) 
        # Get the number of viewers
        new_views = followers[rand(followers.size, row['percent_views'])]

        # And then filter the number of likes and comments it gets ....
        new_likes = new_views[rand(new_views.size, row['percent_likes'])]
        new_comments = new_views[rand(new_views.size, row['percent_comments'])]

        # Finally the number of clicks and donors
        new_click = np.random.random() < prob_click
        new_donor = False
        donation_value = 0
        if new_click: 
            # Probability of donating
            new_donor = np.random.random() < prob_donation
        if new_donor:
            donation_value = get_predicted_donation_value(id_user)
            if donation_value <= 0:
                new_donor = False
            
        # And the only ones who'll get entries into the table - the reposters.
        percent_reposts = row['percent_reposts'] if row['percent_reposts'] < 1 else 1
        new_reposts = followers[rand(followers.size, percent_reposts)] #
        new_reposts = np.array( [follower for follower in new_views if seen_list[follower] == False] )
        # print(new_views)

        # Only reposts get into the new_posts hall of fame ... 
        for nbor in new_reposts: 
            if seen_list[nbor] == False: 
                id_user = nbor #accounts_data[accounts_data['id_user'] == node]
                row = merged_dataset.loc[id_user]
                # print(new_posts_data)
                new_posts_data = new_posts_data.append({
                    'id_user': id_user, 
                    'id_post': counter,
                    'id_post_origin': new_posts_data.loc[new_posts_data['id_user']==node]['id_post'].values[0], 
                    'views': len(new_views),
                    'likes': len(new_likes),
                    'comments': len(new_comments),
                    'reposts':  0,                 
                    'link_clicks': new_click, 
                    'donation_tag': new_donor, 
                    'donation_val': donation_value,              
                }, ignore_index=True)
                counter += 1 
                seen_list[nbor] = True 
                # if repost_list[nbor] == False:
                #     repost_list[nbor] = True
                frontier.append(nbor)
    
    new_posts_data.reset_index(drop=True, inplace=True)
    new_posts_data.set_index('id_user') 

    new_posts = construct_posts_graph(new_posts_data)
    new_posts_data['reposts'] = pd.Series( new_posts.degree(mode='out') )

    return new_posts_data, new_posts

#### KPI Evaluation: Simulation Edition

Let's see how well we did, as compared to the original campaign.

In [None]:
new_posts_data, new_posts = simulation(strategy="closeness", nodes=K)
ig.plot(new_posts)

In [None]:
new_posts_data.head()

In [None]:
visibility(new_posts, new_posts_data)

In [None]:
new_components = new_posts.clusters(mode='weak')
print("Number of components in the NewPosts Graph:", len(new_components))

In [None]:
interactivity(new_posts, new_posts_data)

In [None]:
print("The diameters of each component are: ", get_component_diameters(new_posts) )
print('Plotting the smallest subgraph for visual understanding ... ')

subgr = new_posts.decompose(mode='weak')
ig.plot(subgr[1])

# Monte Carlo Simulation

In [None]:
NUM_OF_SIMULATIONS = 50
strategy = "pagerank"

new_posts_data_and_graph_monte_carlo = []

for i in range (NUM_OF_SIMULATIONS):
    simu_posts_data, simu_posts = simulation(strategy=strategy, nodes=K)
    new_posts_data_and_graph_monte_carlo.append((simu_posts_data,simu_posts))

In [None]:
visibility_monte_carlo = list(map(lambda x: visibility(x[1],x[0]) ,new_posts_data_and_graph_monte_carlo))
# print(visibility_monte_carlo)
average_visibility = sum(visibility_monte_carlo)/len(visibility_monte_carlo)
print(average_visibility)

In [None]:
interactivity_monte_carlo = list(map(lambda x: interactivity(x[1],x[0]) ,new_posts_data_and_graph_monte_carlo))
# print(interactivity_monte_carlo)
average_interactivity = sum(interactivity_monte_carlo)/len(interactivity_monte_carlo)
print(average_interactivity)

average_interactivity_score = get_interactivity_score(average_interactivity)
print("\nAverage Interactivity Score: ", average_interactivity_score)

average_donation_value = get_donation_value(average_interactivity)
print("\nAverage Donation Value: ", average_donation_value)

In [None]:
virality_monte_carlo = list(map(lambda x: get_component_diameters(x[1]), new_posts_data_and_graph_monte_carlo))
average_virality = np.array(virality_monte_carlo).mean(axis=0)
print(average_virality)

print("Longest Diameter: ", get_max_diameter(average_virality))

## Exporting Simulation Data

In [None]:
simulation_runs = list(range(1,NUM_OF_SIMULATIONS + 1))
visibility_data = list(map(get_total_visibility, visibility_monte_carlo))
interactivity_score_data = list(map(get_interactivity_score, interactivity_monte_carlo))
donation_value_data = list(map(get_donation_value, interactivity_monte_carlo))
virality_data = list(map(get_max_diameter, virality_monte_carlo))

simulation_df_inputs_dict = {"runs": simulation_runs, "visibility": visibility_data, "interactivity score": interactivity_score_data, "donation_value": donation_value_data, "diameter": virality_data}
simulation_df = pd.DataFrame(simulation_df_inputs_dict)
simulation_df.set_index(["runs"], inplace=True)
simulation_df.to_csv("./simulation_data/{}{}.csv".format(strategy, NUM_OF_SIMULATIONS))
simulation_df.head()