In [1]:
import pandas as pd
import networkx as nx

In [2]:
import itertools

In [3]:
import tqdm

from collections import Counter

In [4]:
from importlib.machinery import SourceFileLoader

tools = SourceFileLoader("tools", "../tools.py").load_module()

In [5]:
df = pd.read_csv('../../data/prisoners_dilemma_articles_meta_data_clean.csv')

In [6]:
names = df['author']

In [7]:
names = [tools.normalise_name(name) for name in names]

In [8]:
df['author_standarized'] = names

In [9]:
groups = df.groupby('unique_key')

In [10]:
edges = []
authors = []
for i, group in groups:
    authors_in_paper = group['author_standarized'].drop_duplicates()
    for author in authors_in_paper:
        authors.append(author)
    edges += itertools.combinations(authors_in_paper, 2) 

In [11]:
G = nx.Graph()
G.add_nodes_from(authors)
G.add_edges_from(edges)

In [12]:
nx.write_gml(G, '../../data/networks/pd_new_graph.gml')
nx.write_gml(max(nx.connected_component_subgraphs(G), key=len),
             '../../data/networks/pd_cluster_graph.gml')

In [13]:
counter = Counter(edges)

In [14]:
edges_with_weights = []
for edge in counter:
    edges_with_weights.append((edge[0], edge[1], counter[edge]))

In [15]:
G = nx.Graph()
G.add_nodes_from(authors)
G.add_weighted_edges_from(edges_with_weights)

In [16]:
nx.write_gml(G, '../../data/networks/pd_new_graph_with_weights.gml')
nx.write_gml(max(nx.connected_component_subgraphs(G), key=len),
             '../../data/networks/pd_cluster_graph_with_weights.gml')

**Cumulitative**

In [17]:
years = sorted(df.date.unique())

In [18]:
years = sorted(df.date.unique())

for year in tqdm.tqdm(years[:-1]):
    yearly_data = df[df['date'] <= year]
    
    edges = []
    authors = []
    groups = yearly_data.groupby('unique_key')
    
    for i, group in groups:
        authors_in_paper = group['author_standarized'].drop_duplicates()
        for author in authors_in_paper:
            authors.append(author)
        edges += itertools.combinations(authors_in_paper, 2) 

    G = nx.Graph()
    G.add_nodes_from(authors)
    G.add_edges_from(edges)
    
    nx.write_gml(G, f'../../data/networks/yearly/G_pd_{int(year)}.gml')

100%|██████████| 57/57 [00:07<00:00,  7.21it/s] 


**Topics**

In [19]:
data_with_topics = pd.read_csv('../../data/prisoners_dilemma_meta_data_with_topics.csv')

In [20]:
names = data_with_topics['author']

In [21]:
names = [tools.normalise_name(name) for name in names]

In [22]:
data_with_topics['author_standarized'] = names

In [23]:
num_of_topics = len(data_with_topics['Dominant_Topic'].unique())

In [24]:
for number in tqdm.tqdm_notebook(range(num_of_topics)):
    topic_data = data_with_topics[data_with_topics['Dominant_Topic'] == number]
    
    
    edges = []
    authors = []
    groups = topic_data.groupby('unique_key')
    
    for i, group in groups:
        authors_in_paper = group['author_standarized'].drop_duplicates()
        for author in authors_in_paper:
            authors.append(author)
        edges += itertools.combinations(authors_in_paper, 2) 

    G = nx.Graph()
    G.add_nodes_from(authors)
    G.add_edges_from(edges)
    
    nx.write_gml(G, f'../../data/networks/topic_{int(number)}_pd_.gml')    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




# Auction and Price of anarchy networks

In [25]:
auction = pd.read_csv('../../data/auction_theory_articles_meta_data.csv')
anarchy = pd.read_csv('../../data/price_of_anarchy_articles_meta_data.csv')

In [26]:
labels = ['auction', 'anarchy']

In [27]:
for data, label in tqdm.tqdm_notebook(zip([auction, anarchy], labels)):
    names = data['author']
    names = [tools.normalise_name(name) for name in names]
    
    data['author_standarized'] = names
    
    groups = data.groupby('unique_key')
    
    edges = []
    authors = []
    for i, group in groups:
        authors_in_paper = group['author_standarized'].drop_duplicates()
        for author in authors_in_paper:
            authors.append(author)
        edges += itertools.combinations(authors_in_paper, 2) 
        
    G = nx.Graph()
    G.add_nodes_from(authors)
    G.add_edges_from(edges)
    
    nx.write_gml(G, f'../../data/networks/{label}_graph.gml')
    nx.write_gml(max(nx.connected_component_subgraphs(G), key=len),
             f'../../data/networks/{label}_cluster_graph.gml')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…


