In [3]:
import csv
import numpy as np
from collections import defaultdict
import networkx as nx
import igraph as ig
import matplotlib.pyplot as plt
import numpy as np

# create all graphs
papers = ['Baghizadhe2019', 'Darcy2011', 'Günther2017','Moeini2019', 
          'Oehlhorn2020', 'Peireira2020', 'Picollo2005', 'Schneider2014',
          'Siponen2004', 'Siponen2020', 'Teubner2020', 'Tsai2017',
          'Wiener2020', 'XiaoXiao2013']

for name in papers:
    print(name + " - loading")
    group_matches_dictionary = defaultdict(list)
    core_nodes = []
    all_nodes = []

    def alphanumeric_filter_lower(string):
        alphanumeric_filter = filter(str.isalnum, string)
        string = "".join(alphanumeric_filter)
        string = string.lower()
        return string


    with open('./Papers/' + name + '/' + name + '_joined.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        core_node = None
        for row in reader:
            node = row['Title']
            if node == "":
                continue
            if len(row['Title'].split()) < 5:
                author = row['Author'].split(",")
                author = author[0]
                # no comma more than one word, find max word since lastname the most likely to be fully appended
                if len(author.split()) > 1:
                    author_list = author.split()
                    author = max(author_list, key=len)
                if author == '[No author name available]':
                    author = 'unknownauthor'
                node += author
            node += row['Year']
            node = alphanumeric_filter_lower(node)
            if row['FAMILY_TYPE'] == 'PARENT':
                core_node = node
                core_nodes.append(node)
            elif row['FAMILY_TYPE'] == 'CHILD':
                group_matches_dictionary[core_node].append(node)

    #core nodes without any references are not part of the graph
    for core_node in core_nodes:
        if core_node not in group_matches_dictionary:
            core_nodes.remove(core_node)
            
    # direct citation graph
    dc_graph = nx.DiGraph()

    for key in group_matches_dictionary:
        for line in group_matches_dictionary[key]:
            dc_graph.add_edge(key, line)

    for node_id in dc_graph.nodes():
        dc_graph.node[node_id]['viz'] = {'color': {'r': 105, 'g': 105, 'b': 105, 'a': 0}, 'size': 29}

    for core_node_id in core_nodes:
        dc_graph.node[core_node_id]['viz'] = {'color': {'r': 69, 'g': 91, 'b': 150, 'a': 0}, 'size': 30}

    #edges = dc_graph.edges()
    #for edge in edges:
    #    source = edge[0]
    #    target = edge[1]
    #    if source in core_nodes:
    #        if target in core_nodes:
    #            dc_graph[source][target]['viz'] = {'color':  {'r': 0, 'g': 0, 'b': 0, 'a': 1}}

    nx.write_gexf(dc_graph, "./graphs/" + name + "_dc.gexf", version="1.2draft")

    # bibliographic coupling
    adjazenz_matrix_unformatted = nx.adjacency_matrix(dc_graph)
    adjazenz_matrix_unformatted = adjazenz_matrix_unformatted.todense()

    # delete unrelevant columns
    key = 0
    for column in adjazenz_matrix_unformatted.transpose():
        if(np.sum(column) <= 1):
            adjazenz_matrix_unformatted = np.delete(adjazenz_matrix_unformatted, key, 1)
        else:
            key += 1

    adjazenz_matrix = adjazenz_matrix_unformatted
    adjazenz_matrix_transpose = adjazenz_matrix.transpose()

    first_column_vector = np.ones((len(adjazenz_matrix), 1))
    second_column_vector = np.ones((len(adjazenz_matrix_transpose), 1))

    diagonal = adjazenz_matrix_transpose.dot(first_column_vector)
    diagonal = np.array(np.subtract(diagonal, second_column_vector))
    diagonal = diagonal.astype(float)
    diagonal = diagonal ** -1
    diagonal = np.diag(diagonal.T[0])

    bc_adjazenz_matrix = np.matmul(np.matmul(adjazenz_matrix, diagonal), adjazenz_matrix_transpose)

    bc_graph = nx.from_numpy_matrix(bc_adjazenz_matrix, create_using = nx.Graph())
    bc_graph.remove_edges_from(nx.selfloop_edges(bc_graph))
    bc_graph.remove_nodes_from(list(nx.isolates(bc_graph)))


    nx.write_gexf(bc_graph, "./graphs/" + name + "_bc.gexf", version="1.2draft")

Baghizadhe2019 - loading
Darcy2011 - loading
Günther2017 - loading
Moeini2019 - loading
Oehlhorn2020 - loading
Peireira2020 - loading
Picollo2005 - loading
Schneider2014 - loading
Siponen2004 - loading
Siponen2020 - loading
Teubner2020 - loading
Tsai2017 - loading
Wiener2020 - loading
XiaoXiao2013 - loading
