In [1]:
import pickle
from dataclasses import dataclass
from typing import Dict


In [2]:

@dataclass
class CommunityStat:
    week: str
    trump_community:list
    clinton_community:list
    other_community:list

    def filter_by_centrality(self, centrality_list):
        self.trump_community = [id for id in self.trump_community if id in centrality_list]
        self.clinton_community = [id for id in self.clinton_community if id in centrality_list]
        self.other_community = [id for id in self.other_community if id in centrality_list]
        return self
    
    @property
    def dict(self):
        self.__dict = dict()
        self.__dict.update({t:1 for t in self.trump_community})
        self.__dict.update({c:2 for c in self.clinton_community})
        self.__dict.update({o:3 for o in self.other_community})

        return self.__dict

    def average_centrality(self, week_centrality:Dict):
        """
            week_centrality : {
                182739503862  : 0.73,
                9672727986293 : 0.23,...
            }
        """
        import numpy as np
        trump_centrality = np.mean([week_centrality[id] for id in self.trump_community ])
        clinton_centrality = np.mean([week_centrality[id] for id in self.clinton_community ])
        other_centrality = np.mean([week_centrality[id] for id in self.other_community ])

        return (trump_centrality, clinton_centrality, other_centrality)


In [3]:
from typing import List
import pandas as pd


all_centrality = pd.read_csv('Full_centrality.csv', index_col=0)
weeks = sorted(all_centrality.week.unique())
centrality_types = all_centrality.columns[1:-1] # 0: pageid, -0: week
    
trump_page_id = 153080620724
clinton_page_id = 889307941125736

page_with_names = pd.read_csv("page_name_map.csv")
filter_list = list(page_with_names.page_id)


In [4]:
community_week_change:List[CommunityStat] = []

for week in weeks:
    week_string = str(week)[:10]

    # load community in week
    with open(f"Result/{week_string}_community_detection.pkl", 'rb') as f:
        community = pickle.load(file = f)

    trump_community = []
    clinton_community = []
    other = []

    for i, layer in enumerate(community):
        # find Trump
        if trump_page_id in layer:
            trump_community = list(layer)
            continue
        # fund clinton
        if clinton_page_id in layer:
            clinton_community = list(layer)
            continue
        other += layer
    
    community_week_change.append(
        CommunityStat(
            week_string,
            trump_community,
            clinton_community, 
            other).filter_by_centrality(filter_list)
        )

    
    

In [5]:
import pandas as pd
pn_df = pd.read_csv("page_name_map.csv", index_col=0)

community_df = pd.DataFrame({'page_id':list(community_week_change[0].dict.keys())})

c:CommunityStat
for i,c in enumerate(community_week_change):
    temp_df = pd.DataFrame({
        'page_id':list(c.dict.keys()),
        f'week_{i}_community': list(c.dict.values())
        })
    community_df = community_df.join(temp_df.set_index('page_id'), on='page_id')

In [6]:
## average centrality within each community

avg_centrality_panel = pd.DataFrame()

for i, week in enumerate(weeks):
    week_str = str(week)[:10]
    this_week_df = all_centrality[all_centrality.week == week]
    dict_for_df = dict({
        'week': [week_str]*3,
        'community':['Trump', 'Clinton', 'others']
    })

    for centrality_string in centrality_types:
        centrality_dict = dict(
        zip(this_week_df.page_id, this_week_df[centrality_string]))
        dict_for_df[centrality_string] = community_week_change[i].average_centrality(centrality_dict)
    avg_centrality_panel = avg_centrality_panel.append(pd.DataFrame(dict_for_df))

In [7]:


avg_centrality_panel.to_csv("avg_centrality_panel.csv", index = False)

In [21]:
['a']*3

['a', 'a', 'a']