In [1]:
import pandas as pd
import csv
import pickle
import numpy as np
from IPython.display import display
import multiprocessing
from multiprocessing import Pool
import math
from tqdm import tqdm
import time
import traceback
from sklearn.externals import joblib
import xgboost as xgb
import json
import time

In [2]:
path = "/Users/jay/MSC_WSBDA/MSc_Thesis/Msc_project/Data/"
model = joblib.load(path+"/xgb_model_GIVENCHY (2).dat")
#model = joblib.load(path+"xgb_model_15_hrs.dat")

In [3]:
def load_pickle_file(pickled_file):
    print(f'Loading data file from {pickled_file}')
    infile = open(pickled_file,'rb')
    unpickled_file = pickle.load(infile)
    print(f'Loaded {len(unpickled_file)} entries')
    infile.close()
    return unpickled_file
          
    
def save_pickle_file(path, data):
    print('Dumping data to path {}'.format(path))
    with open(path, 'wb') as file:
        pickle.dump(data, file)
    print('Finished dumping data to path {}'.format(path))


def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)


def safe_division(x, y):
    if y == 0:
        return 0
    else:
        return x/y




In [4]:
initial_features = load_pickle_file(path+'0_hrs_data.pkl')
initial_dataset = load_pickle_file(path+'network_simulation_0_hrs.pkl')
users = load_pickle_file(path+"users.dat")
users.reset_index(drop =True , inplace =True)

Loading data file from /Users/jay/MSC_WSBDA/MSc_Thesis/Msc_project/Data/0_hrs_data.pkl
Loaded 5973 entries
Loading data file from /Users/jay/MSC_WSBDA/MSc_Thesis/Msc_project/Data/network_simulation_0_hrs.pkl
Loaded 5973 entries
Loading data file from /Users/jay/MSC_WSBDA/MSc_Thesis/Msc_project/Data/users.dat
Loaded 5973 entries


In [5]:
in_degree = list(initial_dataset.friends_count)
out_degree = list(initial_dataset.followers_count)
degree = in_degree + out_degree

In [6]:
def process_data(source_id,target_id,features,network_simulation,current_time):
    
    if isinstance(network_simulation.loc[target_id,'source_candidates'],list):
        
        source_candidates = sorted(network_simulation.loc[target_id,'source_candidates'])
        sources = source_candidates

        first_source_index = source_candidates[0]
        first_source_row = network_simulation.loc[first_source_index]
        first_source_seed_row = network_simulation.loc[first_source_row['seed_index']]

        sources_dataframe = network_simulation.loc[sources]
        degreeList = list(degree[i] for i in sources)
        inDegreeList = list(in_degree[i] for i in sources)
        outDegreeList = list(out_degree[i] for i in sources)
        degreeList = list(network_simulation.loc[i, 'followers_count'] + network_simulation.loc[i, 'friends_count']  for i in sources)
        timeList = [current_time - network_simulation.loc[x,'time_lapsed'] for x in sources]


        last_source_index = sources[-1]    
        try:
            last_source_row = network_simulation.loc[last_source_index]
            last_source_seed_row = network_simulation.loc[last_source_row['seed_index']]
        except:
            print(f"target_index:{target_id}")
            print(f"last_source_index:{last_source_index}")
            

        #Extraction
        #Columns which are added for simulation, but they are not used as features for model prediction

        user_row = network_simulation.loc[target_id]


        # UsM: User metadata                    

        features.loc[target_id,'UsM_deltaDays0'] = first_source_row.user_created_days
        features.loc[target_id,'UsM_statusesCount0'] = first_source_row.statuses_count
        features.loc[target_id,'UsM_followersCount0'] = first_source_row.followers_count
        features.loc[target_id,'UsM_favouritesCount0'] = first_source_row.favourites_count
        features.loc[target_id,'UsM_friendsCount0'] = first_source_row.friends_count
        features.loc[target_id,'UsM_listedCount0'] = first_source_row.listed_count
        features.loc[target_id,'UsM_normalizedUserStatusesCount0'] = first_source_row.normalized_statuses_count
        features.loc[target_id,'UsM_normalizedUserFollowersCount0'] = first_source_row.normalized_followers_count
        features.loc[target_id,'UsM_normalizedUserFavouritesCount0'] = first_source_row.normalized_favourites_count
        features.loc[target_id,'UsM_normalizedUserListedCount0'] = first_source_row.normalized_listed_count
        features.loc[target_id,'UsM_normalizedUserFriendsCount0'] = first_source_row.normalized_friends_count
        features.loc[target_id,'UsM_deltaDays-1'] = last_source_row.user_created_days
        features.loc[target_id,'UsM_statusesCount-1'] = last_source_row.statuses_count
        features.loc[target_id,'UsM_followersCount-1'] = last_source_row.followers_count
        features.loc[target_id,'UsM_favouritesCount-1'] = last_source_row.favourites_count
        features.loc[target_id,'UsM_friendsCount-1'] = last_source_row.friends_count
        features.loc[target_id,'UsM_listedCount-1'] = last_source_row.listed_count
        features.loc[target_id,'UsM_normalizedUserStatusesCount-1'] = last_source_row.normalized_statuses_count
        features.loc[target_id,'UsM_normalizedUserFollowersCount-1'] = last_source_row.normalized_followers_count
        features.loc[target_id,'UsM_normalizedUserFavouritesCount-1'] = last_source_row.normalized_favourites_count
        features.loc[target_id,'UsM_normalizedUserListedCount-1'] = last_source_row.normalized_listed_count
        features.loc[target_id,'UsM_normalizedUserFriendsCount-1'] = last_source_row.normalized_friends_count
        # TwM: Tweet metadata
        features.loc[target_id,'TwM_t0'] = round(timeList[0], 1)
        features.loc[target_id,'TwM_tSeed0'] = round(current_time - first_source_seed_row['time_lapsed'], 1)
        features.loc[target_id,'TwM_t-1'] = round(timeList[-1], 1)
        features.loc[target_id,'TwM_tSeed-1'] = round(current_time - last_source_seed_row['time_lapsed'], 1)
        features.loc[target_id,'TwM_tCurrent'] = current_time
        # Nw: Network
        features.loc[target_id,'Nw_degree'] = degree[target_id]
        features.loc[target_id,'Nw_inDegree'] = in_degree[target_id]
        features.loc[target_id,'Nw_outDegree'] = out_degree[target_id]
        features.loc[target_id,'Nw_degree0'] = degree[first_source_index]
        features.loc[target_id,'Nw_inDegree0'] = in_degree[first_source_index]
        features.loc[target_id,'Nw_outDegree0'] = out_degree[first_source_index]
        features.loc[target_id,'Nw_degree-1'] = degree[last_source_index]
        features.loc[target_id,'Nw_inDegree-1'] = in_degree[last_source_index]
        features.loc[target_id,'Nw_outDegree-1'] = out_degree[last_source_index]
        features.loc[target_id,'Nw_degreeSeed0'] = degree[int(first_source_row['seed_index'])]
        features.loc[target_id,'Nw_inDegreeSeed0'] = in_degree[int(first_source_row['seed_index'])]
        features.loc[target_id,'Nw_outDegreeSeed0'] = out_degree[int(first_source_row['seed_index'])]
        features.loc[target_id,'Nw_degreeSeed-1'] = degree[int(last_source_row['seed_index'])]
        features.loc[target_id,'Nw_inDegreeSeed-1'] = in_degree[int(last_source_row['seed_index'])]
        features.loc[target_id,'Nw_outDegreeSeed-1'] = out_degree[int(last_source_row['seed_index'])]
        # SNw: Spreading Network
        features.loc[target_id,'SNw_nFriendsInfected'] = len(sources)
        features.loc[target_id,'SNw_friendsInfectedRatio'] = safe_division(len(sources), user_row['friends_count'])
        features.loc[target_id,'SNw_generation0'] = first_source_row['generation']
        features.loc[target_id,'SNw_generation-1'] = last_source_row['generation']
        features.loc[target_id,'SNw_timeSinceSeed0'] = first_source_row['time_since_seed']
        features.loc[target_id,'SNw_timeSinceSeed-1'] = last_source_row['time_since_seed']

        infected_dataframe = network_simulation[network_simulation.time_lapsed <= current_time]
        total_nodes_infected = infected_dataframe.shape[0]
        total_in_degree = sum(infected_dataframe.friends_count)
        total_out_degree = sum(infected_dataframe.followers_count)

        features.loc[target_id,'SNw_totalNodesInfected'] = total_nodes_infected
        features.loc[target_id,'SNw_nodeInfectedCentrality'] = len(sources)/total_nodes_infected
        features.loc[target_id,'SNw_totalInDegree'] = total_in_degree
        features.loc[target_id,'SNw_totalOutDegree'] = total_out_degree
        features.loc[target_id,'SNw_inDegreeCentrality'] = in_degree[target_id]/total_in_degree
        features.loc[target_id,'SNw_inDegreeCentrality0'] = in_degree[first_source_index]/total_in_degree
        features.loc[target_id,'SNw_inDegreeCentrality-1'] = in_degree[last_source_index]/total_in_degree
        features.loc[target_id,'SNw_outDegreeCentrality'] = out_degree[target_id]/total_out_degree
        features.loc[target_id,'SNw_outDegreeCentrality0'] = out_degree[first_source_index]/total_out_degree
        features.loc[target_id,'SNw_outDegreeCentrality-1'] = out_degree[last_source_index]/total_out_degree
        features.loc[target_id,'SNw_inDegreeCentralitySeed0'] = in_degree[int(first_source_row['seed_index'])]/total_in_degree
        features.loc[target_id,'SNw_outDegreeCentralitySeed0'] = out_degree[int(first_source_row['seed_index'])]/total_out_degree
        features.loc[target_id,'SNw_inDegreeCentralitySeed-1'] = in_degree[int(last_source_row['seed_index'])]/total_in_degree
        features.loc[target_id,'SNw_outDegreeCentralitySeed-1'] = out_degree[int(last_source_row['seed_index'])]/total_out_degree
        # Stat: Statistical
        features.loc[target_id,'Stat_average_kOut'] = round(mean(degreeList), 1)
        features.loc[target_id,'Stat_average_t'] = round(mean(timeList), 1)
        features.loc[target_id,'Stat_average_deltaDays'] = sources_dataframe.user_created_days.mean()
        features.loc[target_id,'Stat_average_statusesCount'] = sources_dataframe.statuses_count.mean()
        features.loc[target_id,'Stat_average_followersCount'] = sources_dataframe.followers_count.mean()
        features.loc[target_id,'Stat_average_favouritesCount'] = sources_dataframe.favourites_count.mean()
        features.loc[target_id,'Stat_average_friendsCount'] = sources_dataframe.friends_count.mean()
        features.loc[target_id,'Stat_average_listedCount'] = sources_dataframe.listed_count.mean()
        features.loc[target_id,'Stat_average_normalizedUserStatusesCount'] = sources_dataframe.normalized_statuses_count.mean()
        features.loc[target_id,'Stat_average_normalizedUserFollowersCount'] = sources_dataframe.normalized_followers_count.mean()
        features.loc[target_id,'Stat_average_normalizedUserFavouritesCount'] = sources_dataframe.normalized_favourites_count.mean()
        features.loc[target_id,'Stat_average_normalizedUserListedCount'] = sources_dataframe.normalized_listed_count.mean()
        features.loc[target_id,'Stat_average_normalizedUserFriendsCount'] = sources_dataframe.normalized_friends_count.mean()
        features.loc[target_id,'Stat_max_kOut'] = max(degreeList)
        features.loc[target_id,'Stat_min_kOut'] = min(degreeList)
    
    #processed_dataframe = pd.DataFrame(features)        
    return features


In [7]:
def simulation(features,dataset,network_simulation,current_time):
    uninfected_users_indices = network_simulation[network_simulation['time_lapsed'].isnull() == True].index.values
    for i in uninfected_users_indices:      
        src_candidates = network_simulation.loc[i].source_candidates
        if isinstance(src_candidates,list):
            last_source_index = src_candidates[-1]
            source_index = last_source_index
        else:
            source_index = None
            
        target_index = i
        processed_dataframe = process_data(source_index,target_index,features,network_simulation,current_time)
        #print(f"len processed_dataframe: {processed_dataframe[0,:]}")

        valid_row = processed_dataframe.loc[[target_index]]
        #print(f"(valid_row.columns):{list(valid_row.columns.values)}")

        #for i in range(valid_row.columns):
            #print(f"valid_row:{i}")
        valid = valid_row.drop(['user_id','infected_status','infection_time','followers_list','Nw_inDegree','Nw_outDegree'],axis=1)
        #valid = valid_row.drop(['user_id','infected_status','infection_time','followers_list'],axis=1)
        #print(f"list(valid.columns):{list(valid.columns)}")
        valid = valid.astype('float64')

        columns = list(valid.columns)
        X = valid[columns]
        pre_data = xgb.DMatrix(X)
        infec = model.predict(pre_data)
        if infec > 0.6:
            print("Infected")
            if target_index == 0:
                print(f"target_index:{target_index}")
            network_simulation.loc[target_index,'time_lapsed'] = current_time
            network_simulation.loc[target_index,'source_index'] = source_index
            if network_simulation.loc[target_index,'source_index'] == None:
                network_simulation.loc[target_index,'seed_index'] = target_index
            else:
                network_simulation.loc[target_index,'seed_index'] = network_simulation.loc[source_index,'seed_index']
            
            if source_index == None or network_simulation.loc[source_index,'generation'] == None:
                network_simulation.loc[target_index,'generation'] = 0
            else:
                network_simulation.loc[target_index,'generation'] = network_simulation.loc[source_index,'generation'] + 1
            #network_simulation.loc[target_index,'generation'] = np.where(network_simulation.loc[source_index,'generation'] == None,0,network_simulation.loc[source_index,'generation'] + 1)
            vseed_index = network_simulation.loc[target_index,'seed_index']
            if seed_index == None:
                network_simulation.loc[target_index,'time_since_seed'] = 0
            else:
                network_simulation.loc[target_index,'time_since_seed'] = current_time - network_simulation.loc[seed_index,'time_lapsed']
                
            #network_simulation.loc[target_index,'time_since_seed'] = np.where(network_simulation.loc[target_index,'seed_index'] == None,0,(current_time - network_simulation.loc[seed_index,'time_lapsed']))
            followers_of_node = network_simulation.loc[target_index,'followers_list'] 
            if target_index == 0:
                print(f"followers_of_node:{followers_of_node}")
            if isinstance(followers_of_node,list):
                if target_index == 0:
                    print(f"inside isinstance followers_of_node:")
                for f in followers_of_node:
                    if target_index == 0:
                        print(f"f:{f}")
                    if network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None:
                        if target_index == 0:
                            print(f"inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None")
                        follower_index = network_simulation[network_simulation['id'] == f].index.values.item()
                        if target_index == 0:
                            print(f"follower_index:{follower_index}")
                        if network_simulation.loc[follower_index,'source_candidates'] == None:
                            source_list = []
                            source_list.append(target_index)
                            network_simulation.at[follower_index,'source_candidates'] = source_list
                        else:
                            network_simulation.loc[follower_index,'source_candidates'].append(target_index)
                      
    return network_simulation

In [8]:
total_time_duration = 24*60
interval = 30
#current_time = 360
#current_time = 420
#current_time = 480
#current_time = 540
#current_time = 720
current_time = 0



features = initial_features
network_simulation = initial_dataset

print("Simulation started")
start_time = time.time()



while current_time < total_time_duration:
    print(f"current_time:{current_time}")
    network_simulation = simulation(features,initial_dataset,network_simulation,current_time)
    #print(f"current_time:{current_time}")
    current_time += interval
    
print(f"Simulation finished after {round((time.time() - start_time)/60,2)} minutes")
    

Simulation started
current_time:0
Infected
target_index:0
followers_of_node:[3186545203, 1955472014, 2938214163, 89805327, 342931336, 27220084, 97360017, 357198527, 523160325, 17431232, 3646253717, 291712766, 1648735412, 942362499923566592, 771201035469586432, 396320458, 1452119965, 905214899458068480, 737657348, 2787356547, 959903975716851717, 128980163, 713058981141327872, 302782926, 164311253, 919861873, 972514460190625794, 19926495, 19420302, 89594038, 210199365, 2974819208, 188612425, 16506769, 894380497874432001, 2213622360, 875928744325730305, 749975564, 2889413549, 153464869, 102134228, 782653285534543873, 539069822, 2881487933, 91693977, 2893658458, 765961228711780352, 143511562, 2239165062, 498147861, 176074609, 4578594706, 2299782847, 966556959053787137, 86655586, 34641762, 178566013, 712766459852767233, 150787461, 719864872767463424, 750946574335893504, 1876595412, 3140722654, 735674370283208704, 1064594953, 283391500, 1551584370, 2849428151, 398092749, 967669056, 720792291

inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3367
f:443356624
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3386
f:3173731184
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3390
f:925255366086082560
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3500
f:859184118856204289
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3587
f:753137058
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3594
f:2857473117
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3635
f:872515843577589760
inside  network_simulation[network_simulation['id'] == f]['time_lapsed'].values == None
follower_index:3788
f:378612836
inside  net

Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
I

Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
I

Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
I

Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
I

Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
I

Infected
Infected
Infected
Infected
Infected
Infected
current_time:180
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:210
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Inf

Infected
current_time:930
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:960
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:990
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:1020
Infected
current_time:1050
Infected
Infected
Infected
Infected
Infected
current_time:1080
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:1110
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:1140
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
Infected
current_time:1170
Infected
current_time:1200
Infected
Infected
Infected
Infected
Infe

In [9]:
network_simulation.to_csv(path+'simulation_result_0hrs_24hrs_model.csv')

In [10]:
simulation_comparison = pd.concat([network_simulation['time_lapsed'],users['time_lapsed']],axis = 1)
column_names = ['nw_time_lapsed','usr_time_lapsed']
simulation_comparison.columns = column_names

simulation_comparison['both_infected'] = simulation_comparison.apply(lambda x: 1 if (x['nw_time_lapsed'] != None)
                                                                          & (np.isnan(x['usr_time_lapsed']) == False )
                                                                     else 0,axis = 1)



true_positive = simulation_comparison['both_infected'].sum()
total_predicted_positive = len(simulation_comparison[simulation_comparison['nw_time_lapsed'] != None])
precision = true_positive/total_predicted_positive
total_actual_positive = len(simulation_comparison[simulation_comparison['usr_time_lapsed'] != None])
recall = true_positive/total_actual_positive
f1_score = (2*(precision * recall))/(precision + recall)

In [11]:
accuracy = {
    
    'simulation_start_time':[],
    'precision':[],
    'recall':[],
    'f1_score':[]
    }

accuracy['simulation_start_time'].append(15)
accuracy['precision'].append(precision)
accuracy['recall'].append(recall)

accuracy['f1_score'].append(f1_score)
result = pd.DataFrame(accuracy)
result.to_csv(path+'simulation_accuracy_0hrs_24hrs_model.csv')

In [12]:
simulation_comparison['both_infected'] = simulation_comparison.apply(lambda x: 1 if (x['nw_time_lapsed'] != None)
                                                                          & np.isnan(x['usr_time_lapsed'] == False )
                                                                     else 0,axis = 1)


In [13]:
true_positive

4724

In [14]:
total_predicted_positive

5973

In [15]:
users[users['time_lapsed'] != None]

Unnamed: 0,text,time_lapsed,order,created_at,description,favourites_count,followers_count,followers_json,friends_count,friends_json,...,source_index,seed_index,generation,time_since_seed,user_created_days,normalized_statuses_count,normalized_followers_count,normalized_favourites_count,normalized_listed_count,normalized_friends_count
0,So sad to hear that fashion designer Hubert de...,0.00,1.0,2009-12-31 13:25:24,"Editor, BEAUTY the guide magazine & beauty on-...",11938,143119,"[999926283558535168, 3660960976, 36806897, 800...",4643,"[4675800379, 15030907, 986141250976960513, 156...",...,,0,0,0,2993,60.605747,47.817908,3.988640,0.185433,1.551286
1,RT @davelackie: So sad to hear that fashion de...,0.15,2.0,2015-05-06 10:51:03,I'm just a friendly guy that loves to meet and...,34186,753,"[174020306, 2476090092, 1013804791741468678, 9...",3428,[],...,0,0,1,0.15,1042,12.334933,0.722649,32.808061,0.007678,3.289827
2,RT @davelackie: So sad to hear that fashion de...,0.23,3.0,2015-11-07 15:12:52,"fashion, art, movies, politics, music, sport, ...",78797,2161,"[4370331314, 1004075138, 1014932554032574467, ...",396,"[888571364092858368, 3079405703, 3723675076, 3...",...,0,0,1,0.23,856,54.829439,2.524533,92.052570,0.042056,0.462617
3,RT @davelackie: So sad to hear that fashion de...,0.63,4.0,2013-10-12 01:47:56,"hi im Bree and I cry a lot. gamer, fan of japa...",28995,242,"[989922998147862529, 2444801904, 1006606684440...",642,"[27252380, 289383975, 4695197232, 441485258, 2...",...,0,0,1,0.63,1613,14.370118,0.150031,17.975821,0.034098,0.398016
4,Today we mourn the death of a #fashion legend;...,1.13,5.0,2011-11-06 14:36:28,"#Gamer, #Disney Aficionado, #pansexual, #Music...",1058,116,"[1006899676778336258, 3804949593, 982605650399...",239,"[52966596, 819660767175667712, 395449016, 1020...",...,,4,0,0,2318,1.038395,0.050043,0.456428,0.005177,0.103106
5,RT @davelackie: So sad to hear that fashion de...,1.18,6.0,2014-12-23 12:47:50,,13390,250,"[709984468367482881, 17132736, 498598756, 9948...",493,"[960952916256444417, 2872877470, 9134127935415...",...,0,0,1,1.18,1176,16.152211,0.212585,11.386054,0.017007,0.419218
6,RT @davelackie: So sad to hear that fashion de...,1.32,7.0,2009-11-13 22:07:38,"Beauty, style and lifestyle influencer. I’m th...",20885,39697,"[1017039102473056256, 2886197205, 100858486534...",1924,"[251918778, 40907212, 12, 19910041, 221904033,...",...,0,0,1,1.32,3041,11.392305,13.053930,6.867807,0.173627,0.632687
7,RT @davelackie: So sad to hear that fashion de...,1.83,8.0,2011-07-26 20:13:57,"Football fan, love Alabama Crimson Tide. Favo...",54298,672,"[2786810689, 977670839880634368, 8782691576781...",353,"[3313346954, 912942194, 490900538, 92249107877...",...,0,0,1,1.83,2421,6.464684,0.277571,22.427922,0.000413,0.145808
8,RT @davelackie: So sad to hear that fashion de...,2.15,9.0,2009-03-28 12:30:36,Saving the world before bedtime since 2009,13821,275,"[898979718460035073, 210443333, 82865329, 3728...",440,"[979814420380618752, 730761481, 1522502707, 47...",...,0,0,1,2.15,3272,7.929095,0.084046,4.224022,0.006418,0.134474
9,RT @davelackie: So sad to hear that fashion de...,2.42,10.0,2009-12-17 04:11:02,The Motion..... VCU alumna #NYGiants 💙❤️ #BLAC...,3810,1911,"[175233711, 26642969, 158809028, 54855636, 307...",506,"[950533645361668096, 220736796, 126408146, 811...",...,0,0,1,2.42,3008,58.402593,0.635306,1.266622,0.026596,0.168218


In [16]:
len(network_simulation)

5973

In [17]:
type(network_simulation.loc[1,'source_candidates'])

list

In [18]:
# if network_simulation.loc[0,'source_candidates'] == None:
#     source_list = []
#     source_list.append(999)
#     print(type(source_list))
#     network_simulation.at[0,'source_candidates'] = source_list
#     print(type(network_simulation.loc[0,'source_candidates']))

In [19]:
network_simulation

Unnamed: 0,id,time_lapsed,favourites_count,followers_count,friends_count,listed_count,statuses_count,source_candidates,source_index,seed_index,generation,time_since_seed,user_created_days,normalized_statuses_count,normalized_followers_count,normalized_favourites_count,normalized_listed_count,normalized_friends_count,friends_list,followers_list
0,100766356,0,11938,143119,4643,555,181393,,,0,0,0,2993,60.605747,47.817908,3.988640,0.185433,1.551286,"[765961228711780352, 720792291649171456, 89129...","[3186545203, 1955472014, 2938214163, 89805327,..."
1,3186545203,30,34186,753,3428,8,12853,"[0, 21, 198, 2576]",2576,2576,1,30,1042,12.334933,0.722649,32.808061,0.007678,3.289827,"[16312576, 17431232, 32469566, 605833905, 4961...",[17431232]
2,4134992843,720,78797,2161,396,36,46934,"[12, 972, 48, 748, 2778]",2778,4874,7,720,856,54.829439,2.524533,92.052570,0.042056,0.462617,"[503988356, 595997584, 2840242553, 257482364, ...","[324873471, 2840242553]"
3,1955472014,60,28995,242,642,55,23179,"[0, 577]",577,1953,3,60,1613,14.370118,0.150031,17.975821,0.034098,0.398016,"[100766356, 2889413549, 42921639]","[100766356, 859184118856204289, 1034284633]"
4,406301175,0,1058,116,239,12,2407,,,4,0,0,2318,1.038395,0.050043,0.456428,0.005177,0.103106,[],
5,2938214163,30,13390,250,493,20,18995,"[0, 1953]",1953,1953,1,30,1176,16.152211,0.212585,11.386054,0.017007,0.419218,"[719864872767463424, 17431232, 859184118856204...","[100766356, 17431232, 942362499923566592, 9725..."
6,89805327,30,20885,39697,1924,528,34644,"[0, 21]",21,21,1,30,3041,11.392305,13.053930,6.867807,0.173627,0.632687,"[16312576, 134953223, 55678606, 158128894, 100...","[100766356, 942362499923566592, 1876595412, 55..."
7,342931336,60,54298,672,353,1,15651,[0],0,0,1,60,2421,6.464684,0.277571,22.427922,0.000413,0.145808,"[424690337, 100766356, 572240095]","[100766356, 972514460190625794, 378612836, 572..."
8,27220084,30,13821,275,440,21,25944,[0],0,0,1,30,3272,7.929095,0.084046,4.224022,0.006418,0.134474,[100766356],
9,97360017,30,3810,1911,506,80,175675,[0],0,0,1,30,3008,58.402593,0.635306,1.266622,0.026596,0.168218,"[35036242, 100766356]",[35036242]
