<font color='yellow'>Run everything in order. (pip -> import -> drive mount -> 'Select what to run' -> program itself)

In [1]:
!pip install networkx
!pip install pandas
!pip install matplotlib



In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from google.colab import drive

Requires drive login in order to fetch the dataset from my drive.

In [3]:
drive.mount('/content/drive')
file_id = '1RaMoJKCxdSBpf0jYXBD-3T19x9Y8CKme'
link_csv = f'https://drive.google.com/uc?id={file_id}'

Mounted at /content/drive


In [4]:
# @title <font color='yellow'> Select what to run (note that network_visualization runs slow (notes below)) { display-mode: "form" }
network_visualization = True  # @param {type:"boolean"}
network_analysis = True  # @param {type:"boolean"}
override_old_metrics = True

If network_visualization is checked,

the program will both create a .png and show appropriate network graphs.

Images are saved to /contents.

<font color='yellow'> Runtime for network_visualization in colab is approxiamtely: 33min (with CPU), 30min (with GPU), 45min (with TPU) </font>

I highly recommend running this locally (if you have a powerful PC) as it cuts the waiting time substantially.

------------------------------------------------------------------------------

If network_analysis is checked,

the program will try to find file created by network_metrics(). If it can't be found it will run network_metrics() and save the dataframe for later use. In network_analysis() it will both create a .png and show appropriate charts.

Images are saved to /contents.

<font color='yellow'>Runtime for first run in colab is approximately: 4min 30sec

Runtime after first run in colab is approximately: 5sec </font>

In [5]:
# Starts the program
def run():
    # Creates graph and a tuple list of weights (ratings)
    G, weights = create_graph()

    if network_visualization or network_analysis or override_old_metrics:
        degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality = centralization(G)

    # Runs visualization (depending on your selection above)
    if network_visualization:
        visualization(G, degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality)

    # Forced override of .pkl file. Variable can be found below the checkboxes
    if override_old_metrics:
        node_df = network_metrics(G, weights, degree_centrality, closeness_centrality,
                                  betweenness_centrality, eigenvector_centrality)
        node_df.to_excel('node_df.xlsx', index=False)
        node_df.to_pickle('node_df.pkl')

    # Runs the network analysis. Will read .pkl file if possible, otherwise it runs network_metrics and creates one.
    if network_analysis:
        try:
            node_df = pd.read_pickle('node_df.pkl')
        except:
            node_df = network_metrics(G, weights, degree_centrality, closeness_centrality,
                                      betweenness_centrality, eigenvector_centrality)
            node_df.to_pickle('node_df.pkl')
        network_analysis(node_df)


# create_graph() is lightweight so it runs everytime.
def create_graph():
    # We load the data
    col_names = ['Source', 'Target', 'Rating', 'Time']
    df = pd.read_csv(link_csv, names=col_names, header=None)
    # We drop 'Time' as it's irrelevant for us
    df.drop(['Time'], axis=1, inplace=True)

    # We create directed graph
    G = nx.DiGraph()

    # Adding nodes manually as the data didn't have a specified nodes csv.
    # IDs in Source and Target range from 1 to 6005, so we create them here.
    for node in range(6005):
        G.add_node(int(node + 1))

    # Adding edges from the data we got in the dataframe
    for edge in range(len(df)):
        G.add_edge(int(df._get_value(edge, 'Source', takeable=False)),
                   int(df._get_value(edge, 'Target', takeable=False)),
                   weight=int(df._get_value(edge, 'Rating', takeable=False)))

    # We create a tuple list of ratings and which node the rating is targeted at
    weights = list(zip(list(df['Target']), list(df['Rating'])))

    # Set the graph to weighted
    nx.is_weighted(G)

    return G, weights


# Calculates centrality as it's required in both visualization and analysis
def centralization(G):
    degree_centrality = nx.degree_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G)
    return degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality


# Metrics used in network analysis for each node
class Metrics:
    def __init__(self, node_id: int, edges: int, average_rating: float,
                 degree_centrality: float, closeness_centrality: float,
                 betweenness_centrality: float, eigenvector_centrality: float):
        self.node_id = node_id
        self.edges = edges
        self.average_rating = average_rating
        self.degree_centrality = degree_centrality
        self.closeness_centrality = closeness_centrality
        self.betweenness_centrality = betweenness_centrality
        self.eigenvector_centrality = eigenvector_centrality


# Creates dataframe containing node ID and metrics
def network_metrics(G, weights, degree_centrality, closeness_centrality,
                    betweenness_centrality, eigenvector_centrality):
    node_metrics = []

    # Calculates average rating for each node
    weighted_total = [0] * 6005
    weighted_edges = [0] * 6005
    weighted_averages = [0] * 6005

    for edge in weights:
        weighted_total[(edge[0]) - 1] += edge[1]
        weighted_edges[(edge[0]) - 1] += 1

    for i in range(len(weighted_averages)):
        if weighted_edges[i] != 0:
            weighted_averages[i] = weighted_total[i]/weighted_edges[i]

    # Adds all metrics gathered into node_metrics
    for node_id in degree_centrality:
        this_node = Metrics(int(node_id), int(weighted_edges[node_id-1]), int(weighted_averages[node_id-1]),
                            float(degree_centrality[node_id]), float(closeness_centrality[node_id]),
                            float(betweenness_centrality[node_id]), float(eigenvector_centrality[node_id]))
        node_metrics.append(this_node)

    # Creates dataframe out of node_metrics for later use
    node_df = pd.DataFrame([vars(t) for t in node_metrics])

    return node_df


# Uses the dataframe created in network_metrics to create charts
def network_analysis(node_df):
    node_df.plot.scatter(x="average_rating", y="degree_centrality", alpha=0.1)
    plt.title('Average Rating vs. Degree Centrality')
    plt.savefig("analyze0.png")
    plt.show()

    node_df.plot.scatter(x="average_rating", y="closeness_centrality", alpha=0.1)
    plt.title('Average Rating vs. Closeness Centrality')
    plt.savefig("analyze1.png")
    plt.show()

    node_df.plot.scatter(x="average_rating", y="betweenness_centrality", alpha=0.1)
    plt.title('Average Rating vs. Betweenness Centrality')
    plt.savefig("analyze2.png")
    plt.show()

    node_df.plot.scatter(x="average_rating", y="eigenvector_centrality", alpha=0.1)
    plt.title('Average Rating vs. Eigenvector Centrality')
    plt.savefig("analyze3.png")
    plt.show()

    box_df = node_df.copy()
    box_df.drop(['node_id', 'edges', 'average_rating'], axis=1, inplace=True)
    box_df.plot.box()
    plt.title('Box Plot for Centralities')
    plt.savefig("analyze4.png")
    plt.show()

    test_df = node_df.copy()
    fig, ax = plt.subplots()
    test_df['average_rating'].value_counts().plot(ax=ax, kind='bar')
    plt.title('Frequency of Average Rating')
    plt.savefig("analyze5.png")
    plt.show()


# Creates network graphs
def visualization(G, degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality):
    # Network Visualization
    pos = nx.circular_layout(G)
    plt.figure(figsize=(10, 10))
    nx.draw(G, pos, with_labels=False, node_size=0.5, alpha=1, arrowsize=1, width=0.01)
    plt.title('Bitcoin OTC Trust Weighted Signed Network')

    plt.savefig("visualize0.png")
    plt.show()

    # Visualize Centrality Measures
    plt.figure(figsize=(16, 4))

    plt.subplot(141)
    nx.draw(G, pos, with_labels=False, node_size=[v * 1000 for v in degree_centrality.values()],
            node_color=[v * 1000 for v in degree_centrality.values()], cmap="seismic", alpha=1, arrowsize=1, width=0.01)
    plt.title('Degree Centrality')

    plt.subplot(142)
    nx.draw(G, pos, with_labels=False, node_size=[v * 100 for v in closeness_centrality.values()],
            node_color=[v * 10000 for v in closeness_centrality.values()], cmap="hsv", alpha=1, arrowsize=1, width=0.01)
    plt.title('Closeness Centrality')

    plt.subplot(143)
    nx.draw(G, pos, with_labels=False, node_size=[v * 1000 for v in betweenness_centrality.values()],
            node_color=[v * 1000 for v in betweenness_centrality.values()], cmap="seismic", alpha=1, arrowsize=1, width=0.01)
    plt.title('Betweenness Centrality')

    plt.subplot(144)
    nx.draw(G, pos, with_labels=False, node_size=[v * 1000 for v in eigenvector_centrality.values()],
            node_color=[v * 1000 for v in eigenvector_centrality.values()], cmap="seismic", alpha=1, arrowsize=1, width=0.01)
    plt.title('Eigenvector Centrality')

    plt.savefig("visualize1.png")
    plt.show()

    # Community Detection
    communities = nx.algorithms.community.greedy_modularity_communities(G)

    # Visualize Community Structure
    plt.figure(figsize=(8, 6))
    colors = [i for i, comm in enumerate(communities) for _ in comm]
    nx.draw(G, pos, node_color=colors, with_labels=False, node_size=5, cmap=plt.colormaps.get_cmap("tab20")
            , alpha=1, arrowsize=1, width=0.01)
    plt.title('Community Detection')

    plt.savefig("visualize2.png")
    plt.show()

    # Trust Analysis
    positive_edges = [(u, v) for u, v, w in G.edges(data=True) if w['weight'] > 0]
    negative_edges = [(u, v) for u, v, w in G.edges(data=True) if w['weight'] < 0]

    # Visualize Trust Relationships
    plt.figure(figsize=(12, 4))

    plt.subplot(121)
    nx.draw(G, pos, edgelist=positive_edges, with_labels=False, node_size=0.5, node_color='green'
            , alpha=1, arrowsize=1, width=0.01)
    plt.title('Positive Trust Relationships')

    plt.subplot(122)
    nx.draw(G, pos, edgelist=negative_edges, with_labels=False, node_size=0.5, node_color='red'
            , alpha=1, arrowsize=1, width=0.01)
    plt.title('Negative Trust Relationships')

    plt.savefig("visualize3.png")
    plt.show()


run()


KeyError: 0