In [None]:
import position_communities  # eigenes Skript

import networkx as nx
from csv import reader
import matplotlib.pyplot as plt
from statistics import median
from collections import Counter
from community import best_partition
import radar
import itertools
import copy
import numpy as np
import math

# Initialization

In [None]:
with open('nodes.csv', 'r') as f:
    r = reader(f)
    nodes = [n[0] for n in r]

with open('edges.csv', 'r') as f:
    r = reader(f)
    edges_with_tags = list(r)

In [None]:
mdg = nx.MultiDiGraph()
mdg.add_nodes_from(nodes)
for tup in edges_with_tags:
    src,dest = tup[:2]
    tags = tup[2:]
    mdg.add_edge(src, dest, tags=tags)  # Kanten bekommen die Tags der Frage zugewiesen
mdg.remove_nodes_from(list(nx.isolates(mdg)))  # Nutzer entfernen, die keine Fragen/Antworten gepostet haben
g = nx.Graph(mdg)  # Für Algos, die einen einfachen ungerichteten Graphen benötigen

In [None]:
components = sorted(nx.connected_components(g), key=len, reverse=True)
big_component = components[0]
print(len(components))
print(len(big_component))  # Eine riesige Komponente
len(components[1])  # Alle anderen sind winzig, können ignoriert werden

In [None]:
connected_mdg = nx.subgraph(mdg, big_component)
connected_g = nx.Graph(connected_mdg)
# nx.diameter(subg)  # lange Laufzeit

# General Stats

In [None]:
print(len(mdg.nodes))
print(len(mdg.edges))

In [None]:
sorted_ans_degrees = sorted(dict(mdg.out_degree()).items(), key=lambda x: x[1], reverse=True)

In [None]:
sorted_qu_degrees = sorted(dict(mdg.in_degree()).items(), key=lambda x: x[1], reverse=True)

In [None]:
sorted_ans_degrees[:5]  # Nutzer mit den meisten Antworten

In [None]:
sorted_qu_degrees[:5]  # Nutzer, die die meisten Antworten bekommen haben

In [None]:
med_ans_degree = median([d for n,d in mdg.out_degree()])
med_ans_degree

In [None]:
med_qu_degree = median([d for n,d in mdg.in_degree()])
med_qu_degree  # Die meisten Nutzer registrieren sich nur, um eine Frage/Antwort zu geben

In [None]:
Counter([d for n,d in mdg.out_degree()])  # Verteilung der Nutzer, die x Antworten gegeben haben, könnte man plotten

In [None]:
ans_dict = dict(mdg.out_degree())
items = list(ans_dict.items())
users_who_never_answer = {user for user, degree in items if degree == 0}
len(users_who_never_answer)

In [None]:
q_dict = dict(mdg.in_degree())
items = list(q_dict.items())
users_who_never_ask = {user for user, degree in items if degree == 0}
len(users_who_never_ask)

In [None]:
casual_users = users_who_never_answer.union(users_who_never_ask)
core_users = set(mdg.nodes).difference(casual_users)
len(core_users)

In [None]:
core_user_graph = nx.subgraph(mdg, core_users)
len(core_user_graph.edges)  # Antworten unter Cores

In [None]:
casual_user_graph = nx.subgraph(mdg, casual_users)
len(casual_user_graph.edges)  # Antworten unter Casuals

In [None]:
users_who_answer = core_users.union(users_who_never_ask)
answer_user_graph = nx.subgraph(mdg, users_who_answer)
len(answer_user_graph.edges) - len(core_user_graph.edges)  # Antworten von Casuals an Cores

In [None]:
users_who_ask = core_users.union(users_who_never_answer)
ask_user_graph = nx.subgraph(mdg, users_who_ask)
len(ask_user_graph.edges) - len(core_user_graph.edges)  # Antworten von Cores an Casuals

![](bowtie.png)
Core and Casual user graph (simplified, to scale)

In [None]:
# sagt im Moment nicht viel aus
user_degrees = [(mdg.out_degree([n])[n], mdg.in_degree([n])[n]) for n in mdg.nodes]
ratios = [(t[0] or 1) / (t[1] or 1) for t in user_degrees]
sorted(ratios, reverse=True)

In [None]:
# sagt im Moment nicht viel aus
sorted(ratios)

# Community Detection

In [None]:
partition = best_partition(connected_g)  # Partitionierung mit der größten Modularity
communities = {}
for node, community in partition.items():
    if community in communities:
        communities[community].add(node)
    else:
        communities[community] = {node}
big_communities = {k: v for k, v in communities.items() if len(v) >= 10}

In [None]:
bigcomm_nodes = big_communities.values()
bigcomm_nodes = list(itertools.chain(*bigcomm_nodes))  # flatten

bigcomm_mdg = nx.subgraph(connected_mdg, bigcomm_nodes)
bigcomm_g = nx.subgraph(connected_g, bigcomm_nodes)

In [None]:
def tag_frequency(graph):  # Was sind die häufigsten Tags in einem Graphen?
    all_tags = []
    for src, dest in graph.edges():
        all_tags += graph[src][dest]['tags']
    tag_count = Counter(all_tags).most_common()
    tag_frequency = {tag: 100* count / len(all_tags) for tag, count in tag_count}
    return tag_frequency

In [None]:
def top_tags(graph):
    freq = tag_frequency(graph)
    keys = sorted(freq.keys(), key=lambda k: freq[k], reverse=True)
    return [(tag, freq[tag]) for tag in keys]

In [None]:
top_tags(g)[:10]

In [None]:
from collections import namedtuple
RadarData = namedtuple('RadarData', ['title', 'label', 'values'])

In [None]:
N = 10
global_tf = tag_frequency(g)
common_tags = list(global_tf.keys())[:N]

dataset = []

for index, comm in enumerate(big_communities):
    comm_g = nx.subgraph(bigcomm_g, communities[comm])
    
    tf = tag_frequency(comm_g)
    tags = list(tf.keys())[:10]
    
    this_vals = [tf[tag] for tag in tags]
    global_vals = [global_tf[tag] for tag in tags]
    values = (this_vals, global_vals)


    
    dataset.append(RadarData(f'Community {comm}', tags, values))
    #diff_dict = {key: tf[key] - global_tf.get(key, 0) for key in tf.keys()}
    #print(diff_dict)

In [None]:
import radar

theta = radar.radar_factory(N, frame='polygon')

fig, axes = plt.subplots(figsize=(5, 120), nrows=math.ceil(len(big_communities)),
                         subplot_kw=dict(projection='radar'))
fig.subplots_adjust(top=1)

colors = ['b', 'r', 'g', 'm', 'y']
for ax, data in zip(axes.flatten(), dataset):
    ax.set_thetagrids(np.degrees(theta), labels=data.label)
    ax.set_rgrids([5, 10, 15, 20])
    ax.set_title(data.title, weight='bold', size='medium', position=(0.5, 1.1),
                 horizontalalignment='center', verticalalignment='center')
    ax.plot(theta, data.values[0], color='r')
    ax.fill(theta, data.values[0], facecolor='r', alpha=0.25)
    ax.plot(theta, data.values[1], color='b')
    ax.fill(theta, data.values[1], facecolor='b', alpha=0.25)

# add legend relative to top-left plot
#ax = axes[0, 0]
#labels = ('Global', 'Community')
#legend = ax.legend(labels, loc=(0.9, .95), labelspacing=0.1, fontsize='small')
plt.show()
plt.savefig('community_tags')

# Drawing

In [None]:
# Wir wollen Partitionen mit weniger als 10 Knoten rausschmeißen, sie füllen unnötig das Bild
small_partition_nodes = [node for node in partition if node not in bigcomm_g]

big_partition = copy.deepcopy(partition)
for node in small_partition_nodes:
    del big_partition[node]

In [None]:
import matplotlib.patches as mpatches
def colors(vals):
    c = Counter(vals)
    n = len(c)
    part_mapping = {part: index for index, part in enumerate(c.keys())}
    cmap = plt.cm.hsv
    cols = [cmap(part_mapping[val] / n) for val in vals]
    
    legend_objs = []
    for part in c.keys():
        legend_objs.append(mpatches.Patch(color=cmap(part_mapping[part] / n), label=f'Community {part}'))
    return cols, legend_objs

In [None]:
positions = position_communities.community_layout(bigcomm_g, big_partition, 3)


plt.figure(num=None, figsize=(40,40))

cols, legend_objs = colors(big_partition.values())
plt.legend(handles=legend_objs, fontsize=40)
nx.draw_networkx(bigcomm_g, pos=positions, node_size=20, width=0.4, with_labels=False, node_color=cols)
plt.savefig("partition.png")