In [None]:
import position_communities  # eigenes Skript

import networkx as nx
from csv import reader
import matplotlib.pyplot as plt
from statistics import median
from collections import Counter
from community import best_partition
import itertools

In [None]:
with open('nodes.csv', 'r') as f:
    r = reader(f)
    nodes = [n[0] for n in r]

with open('edges.csv', 'r') as f:
    r = reader(f)
    edges_with_tags = list(r)

In [None]:
mdg = nx.MultiDiGraph()
mdg.add_nodes_from(nodes)
for tup in edges_with_tags:
    src,dest = tup[:2]
    tags = tup[2:]
    mdg.add_edge(src, dest, tags=tags)  # Kanten bekommen die Tags der Frage zugewiesen
mdg.remove_nodes_from(list(nx.isolates(mdg)))  # Nutzer entfernen, die keine Fragen/Antworten gepostet haben
g = nx.Graph(mdg)  # Für Algos, die einen einfachen ungerichteten Graphen benötigen

In [None]:
print(len(mdg.nodes))
print(len(mdg.edges))

In [None]:
components = sorted(nx.connected_components(g), key=len, reverse=True)
big_component = components[0]
print(len(components))
print(len(big_component))  # Eine riesige Komponente
len(components[1])  # Alle anderen sind winzig, können ignoriert werden

In [None]:
connected_mdg = nx.subgraph(mdg, big_component)
connected_g = nx.Graph(connected_mdg)
# nx.diameter(subg)  # lange Laufzeit

In [None]:
partition = best_partition(connected_g)  # Partitionierung mit der größten Modularity
communities = {}
for node, community in partition.items():
    if community in communities:
        communities[community].add(node)
    else:
        communities[community] = {node}

In [None]:
bigcomm_nodes = [nodes for community, nodes in communities.items() if len(nodes) >= 10]
bigcomm_nodes = list(itertools.chain(*bigcomm_nodes))  # flatten

bigcomm_mdg = nx.subgraph(connected_mdg, bigcomm_nodes)
bigcomm_g = nx.subgraph(connected_g, bigcomm_nodes)

In [None]:
def tag_frequency(graph):  # Was sind die häufigsten Tags in einem Graphen?
    all_tags = []
    for src, dest in graph.edges():
        all_tags += graph[src][dest]['tags']
    tag_count = Counter(all_tags).most_common()
    tag_frequency = [(tag, count / len(all_tags)) for tag, count in tag_count]
    return tag_frequency

In [None]:
tag_frequency(g)[:10]

In [None]:
sorted_ans_degrees = sorted(dict(mdg.out_degree()).items(), key=lambda x: x[1], reverse=True)

In [None]:
sorted_qu_degrees = sorted(dict(mdg.in_degree()).items(), key=lambda x: x[1], reverse=True)

In [None]:
sorted_ans_degrees[:5]  # Nutzer mit den meisten Antworten

In [None]:
sorted_qu_degrees[:5]  # Nutzer, die die meisten Antworten bekommen haben

In [None]:
med_ans_degree = median([d for n,d in mdg.out_degree()])
med_ans_degree

In [None]:
med_qu_degree = median([d for n,d in mdg.in_degree()])
med_qu_degree  # Die meisten Nutzer registrieren sich nur, um eine Frage/Antwort zu geben

In [None]:
Counter([d for n,d in mdg.out_degree()])  # Verteilung der Nutzer, die x Antworten gegeben haben, könnte man plotten

In [None]:
ans_dict = dict(mdg.out_degree())
items = list(ans_dict.items())
users_who_never_answer = {user for user, degree in items if degree == 0}
len(users_who_never_answer)

In [None]:
q_dict = dict(mdg.in_degree())
items = list(q_dict.items())
users_who_never_ask = {user for user, degree in items if degree == 0}
len(users_who_never_ask)

In [None]:
casual_users = users_who_never_answer.union(users_who_never_ask)
core_users = set(mdg.nodes).difference(casual_users)
len(core_users)

In [None]:
core_user_graph = nx.subgraph(mdg, core_users)
len(core_user_graph.edges)  # Antworten unter Cores

In [None]:
casual_user_graph = nx.subgraph(mdg, casual_users)
len(casual_user_graph.edges)  # Antworten unter Casuals

In [None]:
users_who_answer = core_users.union(users_who_never_ask)
answer_user_graph = nx.subgraph(mdg, users_who_answer)
len(answer_user_graph.edges) - len(core_user_graph.edges)  # Antworten von Casuals an Cores

In [None]:
users_who_ask = core_users.union(users_who_never_answer)
ask_user_graph = nx.subgraph(mdg, users_who_ask)
len(ask_user_graph.edges) - len(core_user_graph.edges)  # Antworten von Cores an Casuals

![](bowtie.png)
Core and Casual user graph (simplified, to scale)

In [None]:
# sagt im Moment nicht viel aus
user_degrees = [(mdg.out_degree([n])[n], mdg.in_degree([n])[n]) for n in mdg.nodes]
ratios = [(t[0] or 1) / (t[1] or 1) for t in user_degrees]
sorted(ratios, reverse=True)

In [None]:
# sagt im Moment nicht viel aus
sorted(ratios)

# Drawing

In [None]:
# Wir wollen Partitionen mit weniger als 10 Knoten rausschmeißen, sie füllen unnötig das Bild
big_partition = partition
small_partition_nodes = [node for node in big_partition if node not in bigcomm_g]

for node in small_partition_nodes:
    del big_partition[node]

In [None]:
positions = position_communities.community_layout(bigcomm_g, big_partition, 3)

plt.figure(num=None, figsize=(60,60))
nx.draw_networkx(bigcomm_g, pos=positions, node_size=50, with_labels=False, node_color=list(big_partition.values()))
plt.savefig("partition.png")