# Hybrid Recommends

recommend legal documents based on citation network analysis and document embeddings similarity

- Berechnung der hybriden Empfehlungen
- Anekdotische Evaluation
- Vorbereitung Gesamtevaluation: Teil hybride Empfehlungen

In [65]:
import pandas as pd
import numpy as np

import networkx as nx

import heapq
from operator import itemgetter

import hvplot.networkx as hvnx

from scipy.spatial import distance

import datetime

#### <span style='background:lightgreen'>Load df:</span>

In [14]:
df_embeddings = pd.read_json("data/dataframes/df_with_docvectors.json", orient='columns', encoding='utf-8')

In [15]:
df_embeddings.columns

Index(['az', 'datum', 'doknr', 'gertyp', 'spruchkoerper', 'doktyp', 'titel',
       'tenor', 'gruende', 'clean_gruende', 'clean_tenor', 'clean_titel',
       'combined_clean', 'doc_vector'],
      dtype='object')

In [16]:
# keep only necessary columns of df_embeddings
df_embeddings_short = df_embeddings[['az', 'datum', "doknr", "doktyp", "titel", "tenor", "gruende", "doc_vector"]]

### NETWORK

### graph

#### <span style='background:lightgreen'>Load graph / citation network dg from notebook CitationNetwork:</span>

In [17]:
dg = nx.read_gpickle("graphs/dg.gpickle")

In [18]:
# show number of edges
dg.number_of_edges()

13782

In [19]:
# show number of nodes
dg.number_of_nodes()

5826

In [20]:
# check if graph is acyclic
nx.is_directed_acyclic_graph(dg)

True

### network functions
from notebook CitationNetwork

In [21]:
# get n keys with maximum values from dictionary
def getMaxValues(dictionary: dict, n: int):
    top_n = heapq.nlargest(n, dictionary.items(), key=itemgetter(1))
    return top_n   

In [22]:
# create one level of sub citation net for document of interest from existing full network
def createLevel(doi, network):
        cites, edges_f = getEdgesForward(doi, network)
        cited_by, edges_b = getEdgesBackward(doi, network)
        return cites, edges_f, cited_by, edges_b

In [23]:
# document of interest cites these documents
# returns list of nodes (cites) and list of edges
def getEdgesForward(doc_of_interest: str, main_network):
    cites = list(main_network.successors(doc_of_interest))
    source_nodes = [doc_of_interest]*len(cites)
    edges = list(zip(source_nodes,cites))
    return cites, edges

In [24]:
# document of interest is cited by these documents
# returns list of nodes (cited_by) and list of edges
def getEdgesBackward(doc_of_interest: str, main_network):
    cited_by = list(main_network.predecessors(doc_of_interest))
    source_nodes = [doc_of_interest]*len(cited_by)
    edges = list(zip(cited_by, source_nodes))
    return cited_by, edges

In [25]:
"""
create multilevel citation net
(3 in each direction -> 6-level)
for document of interest doi 
based on given full network
"""
def create3levelNet(doi, orig_digraph):    
    collected_edges = []
    
    cites1, citedby1, cites2, cites2_, citedby2, citedby2_ = [],[],[],[],[],[]
    # first level
    cites1, edges_forward, citedby1, edges_backward = createLevel(doi, orig_digraph)
    collected_edges = collected_edges + edges_forward + edges_backward

    # second level
    if cites1:
        for c in cites1:
            cites2, edges_forward, citedby2, edges_backward = createLevel(c, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward
    if citedby1:
        for cb in citedby1:
            cites2_, edges_forward, citedby2_, edges_backward = createLevel(cb, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward

    # third level
    if cites2:
        for c in cites2:
            cites3, edges_forward, citedby3, edges_backward = createLevel(c, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward
    if citedby2:
        for bi in citedby2:
            cites3_a, edges_forward, citedby3_a, edges_backward = createLevel(bi, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward
    if cites2_:
        for c in cites2_:
            cites3_b, edges_forward, citedby3_b, edges_backward = createLevel(c, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward
    if citedby2_:
        for bi in citedby2_:
            cites3_c, edges_forward, citedby3_c, edges_backward = createLevel(bi, orig_digraph)
            collected_edges = collected_edges + edges_forward + edges_backward
    
    multCitNet = nx.DiGraph()
    multCitNet.add_edges_from(collected_edges)
    
    return multCitNet 

In [26]:
"""
calculate bibliographic coupling score
as proposed in Son and Kim 2018
for a document d in a given directed graph
"""
def calculateBiblioCouplingScore(d, di_graph):
    coupled = []
    successors = list(di_graph.successors(d))
    for successor in successors:
        predecessors = list(di_graph.predecessors(successor))
        predecessors.remove(d)
        coupled += predecessors
    return len(coupled)    

In [27]:
"""
calculate cocitation score
as proposed in Son and Kim 2018
for a document d in a graph
"""
def calculateCocitationScore(d, di_graph):
    cocited = []
    predecessors = list(di_graph.predecessors(d))
    for predecessor in predecessors:
        successors = list(di_graph.successors(predecessor))
        successors.remove(d)
        cocited += successors
    return len(cocited)    

In [28]:
"""
calculate (undirected) distance
between document of interest
and another document d
"""
def calculateDistance(doi, d, di_graph):
    # convert to undirected
    graph = di_graph.to_undirected()
    return nx.shortest_path_length(graph, doi, d)

In [29]:
"""
calculate C-score
as suggested in Son and Kim 2018
for a document d in a graph
relative to a given document of interest doi
"""
def calculateCScore(doi, d, di_graph):
    bcs = calculateBiblioCouplingScore(d,di_graph)
    ccs = calculateCocitationScore(d,di_graph)
    d = calculateDistance(doi, d, di_graph)
    return (bcs+ccs)/d

In [30]:
# get all C-scores for a given document of interest in a network
def getCScoresForNetwork(doi, di_graph) -> dict:
    all_nodes = list(di_graph.nodes)
    all_nodes.remove(doi)
    c_score_dict = dict()
    for node in all_nodes:
        c = calculateCScore(doi, node, di_graph)
        c_score_dict.update({node: c})
    return c_score_dict

In [31]:
"""
get n candidate documents (with highest C-scores)
for a given document of interest in a network
"""
def getNCandidateDocuments(n: int, doi, di_graph) -> list:
    c_scores = getCScoresForNetwork(doi, di_graph)
    return list(dict(getMaxValues(c_scores, n)).keys())

In [32]:
"""
calculate the indegree centrality
of candidate documents in a network
"""
def getIndegree(net: nx.DiGraph, candidates: list) -> dict:
    in_degree_dict = dict()
    for candidate in candidates:
        indegree = net.in_degree(candidate)
        in_degree_dict.update({candidate: indegree})
    return in_degree_dict 

In [33]:
"""
calculate the closeness centrality
of candidate documents in a network
(undirected)
"""
def getClosenessCentralityUndirected(net: nx.DiGraph, candidates: list) -> dict:
    closeness_dict = dict()
    net_ud = net.to_undirected()
    for candidate in candidates:
        closeness = nx.closeness_centrality(net_ud, candidate)
        closeness_dict.update({candidate: closeness})
    return closeness_dict

In [34]:
"""
calculate the betweenness centrality
of candidate documents in a network
(undirected)
"""
def getBetweennessCentralityUndirected(net: nx.DiGraph, candidates: list) -> dict:
    betweenness_dict = nx.betweenness_centrality(net.to_undirected())    
    return {k:betweenness_dict[k] for k in candidates if k in betweenness_dict}

In [35]:
"""
calculate the Katz centrality
(instead of eigenvector centrality as Kim/Son propose)
of candidate documents in a network
(directed)
"""
def getKatzCentralityDirected(net: nx.DiGraph, candidates: list) -> dict:
    katz_dict = nx.katz_centrality(net)    
    return {k: katz_dict[k] for k in candidates if k in katz_dict}

In [36]:
# convert centrality values to ranks
def revertToRanks(centrality_dict: dict) -> dict:
    sorted_candidates = sorted(centrality_dict.items(), key=itemgetter(1), reverse=True)
    rank_dict = dict()
   
    for i, candidate in enumerate(sorted_candidates):
        # if first item in sorted_candidates:
        if i == 0:
            rank_dict.update({candidate[0]: i})   
        elif i != len(sorted_candidates)-1 and i !=0 and sorted_candidates[i][1] == sorted_candidates[i-1][1]:
            rank_dict.update({candidate[0]: rank_dict[sorted_candidates[i-1][0]]})
        # if last item in list    
        elif i == len(sorted_candidates)-1:
            if sorted_candidates[i][1] < sorted_candidates[i-1][1]:
                rank_dict.update({candidate[0]: rank_dict[sorted_candidates[i-1][0]]+1})
            elif sorted_candidates[i][1] == sorted_candidates[i-1][1]:
                rank_dict.update({candidate[0]: rank_dict[sorted_candidates[i-1][0]] })
        else:
            rank_dict.update({candidate[0]: rank_dict[sorted_candidates[i-1][0]]+1})
            
    return rank_dict

In [37]:
# combine the 4 rankings based on the different centrality measures to one ranking
def combineRanks(ranks1: dict, ranks2: dict, ranks3: dict, ranks4: dict) -> dict:
    avg_rank_dict = dict()
    for key in ranks1:
        avg_rank = (ranks1[key] + ranks2[key] + ranks3[key] + ranks4[key]) / 4.0
        avg_rank_dict.update({key: avg_rank})
    return avg_rank_dict

In [38]:
# normalize ranks to number between 0 and 1
def normalizeRanks(xi: float, mini: float, maxi: float) -> float:
    return (xi-mini)/(maxi-mini+5)

In [39]:
"""
adapted from function recommendNDecisions in notebook CitationNetwork
calculates ranks for every document connected to doi in main_net
returns dict with document, combined rank, normalized rank
"""
def calcNetworkScores(doi: str, main_net: nx.DiGraph, df) -> dict:
    
    # generate directional 6-level (3 levels backward, 3 forward) citation network
    # from document of interest doi
    print("creating multilevel directed citation net..")
    mult_level_net = create3levelNet(doi, main_net)
    print("number of nodes: " + str(mult_level_net.number_of_nodes()))
    print("number of edges: " + str(mult_level_net.number_of_edges()))
    
    # select candidate documents / get all documents
    n_candidates = int(mult_level_net.number_of_nodes())
    print("selecting " + str(n_candidates) + " candidate documents..")
    # get all possible candidates
    candidates = getNCandidateDocuments(n_candidates, doi, mult_level_net)
    
    print(str(len(candidates)) + " found")
    
    # determine recommend documents
    print("determining recommend documents..")
    # indegree 
    ic = getIndegree(mult_level_net, candidates)
    ic_ranked = revertToRanks(ic)
    print("document with highest indegree: " + str(list(ic_ranked.keys())[0]))
    # closeness
    cc = getClosenessCentralityUndirected(mult_level_net, candidates)
    cc_ranked = revertToRanks(cc)
    print("document with highest closeness centrality: " + str(list(cc_ranked.keys())[0]))
    # betweenness
    bc = getBetweennessCentralityUndirected(mult_level_net, candidates)
    bc_ranked = revertToRanks(bc)
    print("document with highest betweenness centrality: " + str(list(bc_ranked.keys())[0]))
    # Katz
    kc = getKatzCentralityDirected(mult_level_net, candidates)
    kc_ranked = revertToRanks(kc)
    print("document with highest Katz centrality: " + str(list(kc_ranked.keys())[0]))
    
    combined_ranks = combineRanks(ic_ranked, cc_ranked, bc_ranked, kc_ranked)
    sorted_combined_ranks = sorted(combined_ranks.items(), key=itemgetter(1))
    
    # normalize ranks for sorted candidates
    dict_norm_ranks = dict()
    ranks = [x[1] for x in sorted_combined_ranks]
    max_rank = max(ranks)
    min_rank = min(ranks)
    for i,rank_doc in enumerate(sorted_combined_ranks):
        dict_norm_ranks.update({rank_doc[0]:{"combined_rank": rank_doc[1], "normalized_rank": 1-normalizeRanks(rank_doc[1],min_rank,max_rank)}})
    
    return dict_norm_ranks

### example testing for doi "KORE634712019"

In [None]:
#calcNetworkScores("KORE634712019", dg, df_embeddings_short)

"""
{'JURE130006300': {'combined_rank': 0.5, 'normalized_rank': 1.0},
 'JURE100065079': {'combined_rank': 0.5, 'normalized_rank': 1.0},
 'JURE150002589': {'combined_rank': 3.0,
  'normalized_rank': 0.9342105263157895},
 'JURE130010202': {'combined_rank': 5.75,
  'normalized_rank': 0.8618421052631579},
 'KORE313582016': {'combined_rank': 6.0,
  'normalized_rank': 0.8552631578947368},
 'JURE140006565': {'combined_rank': 9.0,
  'normalized_rank': 0.7763157894736842},
 'JURE110009413': {'combined_rank': 9.0,
  'normalized_rank': 0.7763157894736842},
 'JURE150008053': {'combined_rank': 9.25,
  'normalized_rank': 0.7697368421052632},
 'JURE150001651': {'combined_rank': 9.5,
  'normalized_rank': 0.7631578947368421},
 'KORE304862018': {'combined_rank': 10.0, 'normalized_rank': 0.75},
 'JURE150016039': {'combined_rank': 11.0,
  'normalized_rank': 0.7236842105263157},
 'JURE120021683': {'combined_rank': 12.25,
  'normalized_rank': 0.6907894736842105},
 'JURE160006718': {'combined_rank': 12.75,
  'normalized_rank': 0.6776315789473684},
  ....
"""

explore

In [42]:
KORE634712019_net = create3levelNet("KORE634712019", dg)

In [43]:
# plot 3level network around KORE634712019
pos = nx.layout.spring_layout(KORE634712019_net)

node_sizes = [3 + 2 * i for i in range(len(KORE634712019_net))]
M = KORE634712019_net.number_of_edges()
edge_colors = range(2, M + 2)
edge_alphas = [(5 + i) / (M + 4) for i in range(M)]

nodes = hvnx.draw_networkx_nodes(KORE634712019_net, pos, node_size=node_sizes, node_color='blue')
edges = hvnx.draw_networkx_edges(KORE634712019_net, pos, node_size=node_sizes, arrowstyle='->',
                               arrowsize=10, edge_color=edge_colors,
                               edge_cmap='Blues', edge_width=2, colorbar=True)

nodes * edges

In [45]:
# doi
df_embeddings_short.loc[df_embeddings_short['doknr'] == "KORE634712019"].titel.values[0]

'Jugendstrafverfahren: jugendspezifische Bestimmung des Schuldgehalts; Verhängung einer Jugendstrafe trotz nicht mehr bestehenden Erziehungsbedarfs'

In [46]:
# first
print(df_embeddings_short.loc[df_embeddings_short['doknr'] == "JURE100065079"].titel.values)
print(df_embeddings_short.loc[df_embeddings_short['doknr'] == "JURE130006300"].titel.values)

['Rücktritt vom versuchten Totschlag: Fehlgeschlagener Versuch; Unterscheidung zwischen unbeendetem und beendetem Versuch']
['Rücktritt vom Tötungsversuch: Abgrenzung zwischen beendetem, unbeendetem und fehlgeschlagenem Versuch bei einem durch Zäsuren gekennzeichneten mehraktigen Geschehen; Konkurrenz zwischen Freiheitsberaubung und Körperverletzung; einzelne Versuchshandlungen als natürliche Handlungseinheit']


In [47]:
# second
df_embeddings_short.loc[df_embeddings_short['doknr'] == 'KORE314022011'].titel.values[0]

'Strafzumessung: Kompensation bei Verfahrensverzögerung im Ausland'

In [48]:
# third
df_embeddings_short.loc[df_embeddings_short['doknr'] == 'JURE150002589'].titel.values[0]

'Notwendiger Inhalt eines Strafurteils wegen versuchten Mordes: Abgrenzung des fehlgeschlagenen vom unbeendeten und beendeten Versuch; Prüfung eines freiwilligen Rücktritts'

In [49]:
# fourth
print(df_embeddings_short.loc[df_embeddings_short['doknr'] == "JURE130010202"].titel.values)

['Jugendstrafe nach Vergewaltigung: Anordnungsgrund der "Schwere der Schuld"; kumulatives Erfordernis einer Erziehungsbedürftigkeit']


### EMBEDDINGS

clean df

In [50]:
df_embeddings_short.isnull().sum()

az            0
datum         0
doknr         0
doktyp        0
titel         0
tenor         0
gruende       0
doc_vector    1
dtype: int64

In [51]:
np.where(df_embeddings_short.doc_vector.isnull())

(array([3679]),)

In [52]:
df_embeddings_short = df_embeddings_short.drop(3679).reset_index(drop=True)

prepare document vectors for similarity analysis

In [53]:
def prepareCandidateVectors(vectors):
    vectors_list = []
    for v in vectors:
        vectors_list.append(v) 
    vectors = np.asarray(vectors_list)
    return vectors

In [54]:
vectors = prepareCandidateVectors(df_embeddings_short.doc_vector.values)

In [55]:
vectors.shape

(5825, 300)

In [56]:
def getDoknrByIndex(df, index: int) -> str:
    return df.loc[index].doknr    

In [57]:
def getIndexByDoknr(df, doknr: str) -> int:
    return df[df['doknr'] == doknr].index[0]   

In [58]:
# sort dict by hybrid scores (descending)
def sortByHybridVal(dictio: dict) -> dict:
    sorted_dict = {k: v for k, v in sorted(dictio.items(), key=lambda item: item[1]["hybrid"], reverse=True)}
    return sorted_dict

In [59]:
# get top n hybrid recommends
def getNHybridRecommends(n: int, dictio: dict):
    sorted_dict = sortByHybridVal(dictio)
    top_n = list(sorted_dict.items())[:n]
    return top_n

In [60]:
# calculate hybrid recommends based on network AND embeddings
def calcHybridRecommends(doi: str, embedding_df, main_net: nx.DiGraph):
    
    # embeddings
    vectors = prepareCandidateVectors(embedding_df.doc_vector.values)
    
    target_vector = embedding_df[embedding_df.doknr==doi].doc_vector.values[0]
    distances = distance.cdist([target_vector], vectors, "cosine")[0]
    idxs_sorted = np.argsort(distances)
    embedding_dict = dict()
    for idx in idxs_sorted:
        d = distances[idx]
        similarity = 1 - d
        doknr = getDoknrByIndex(embedding_df, idx)
        embedding_dict.update({doknr: {"cos_sim": similarity}})
        
    # network
    net_dict = calcNetworkScores(doi, main_net, embedding_df)
    
    # hybrid
    hybrid_dict = dict()
    for key in embedding_dict:
        text_score = embedding_dict[key]["cos_sim"]
        if key in net_dict:
            net_score = net_dict[key]["normalized_rank"]     
        elif key not in net_dict:
            net_score = 0.0
        
        hybrid_score = (text_score+net_score)/2
        hybrid_dict.update({key: {"hybrid": hybrid_score, "net": net_score, "text": text_score}})
            
    
    return hybrid_dict

### Testing & Evaluation

In [69]:
# convert datum to readable format
df_embeddings_short["datum"] = df_embeddings_short["datum"].apply(lambda x: datetime.datetime.utcfromtimestamp(x/1000))

In [72]:
# generate recommendations and show results
def recommendAndShow(doi_doknr: str, amount_recs: int, dataframe, network):
    ranking = calcHybridRecommends(doi_doknr, dataframe, network)
    recs = getNHybridRecommends(amount_recs, ranking)
    print()
    print("DOI: " + doi_doknr)
    print("Titel: " + dataframe.loc[dataframe['doknr'] == doi_doknr].titel.values[0])
    print("Datum: " + str(dataframe.loc[dataframe['doknr'] == doi_doknr].datum.values[0]))
    print("AZ: " + dataframe[dataframe['doknr'] == doi_doknr].az.values[0])
    for i, rec in enumerate(recs):
        print()
        print("Recommendation # " + str(i+1) + ": " + rec[0] + " (score: " + str(rec[1]) + ")")
        print("Titel: " + dataframe.loc[dataframe['doknr'] == rec[0]].titel.values[0])
        print("Datum: " + str(dataframe.loc[dataframe['doknr'] == rec[0]].datum.values[0]))
        print("AZ: " + dataframe[dataframe['doknr'] == rec[0]].az.values[0])

test 1: doi 'KORE634712019'

In [73]:
recommendAndShow('KORE634712019', 10, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 129
number of edges: 188
selecting 129 candidate documents..
128 found
determining recommend documents..
document with highest indegree: JURE130006300
document with highest closeness centrality: JURE100065079
document with highest betweenness centrality: JURE100065079
document with highest Katz centrality: JURE130006300

DOI: KORE634712019
Titel: Jugendstrafverfahren: jugendspezifische Bestimmung des Schuldgehalts; Verhängung einer Jugendstrafe trotz nicht mehr bestehenden Erziehungsbedarfs
Datum: 2019-11-13T00:00:00.000000000
AZ: 2 StR 217/19

Recommendation # 1: JURE140006565 (score: {'hybrid': 0.8394914926596815, 'net': 0.7763157894736842, 'text': 0.9026671958456788})
Titel: Jugendstrafverfahren wegen versuchten Totschlages und gefährlicher Körperverletzung: Berücksichtigung des Erziehungsgedankens bei der Strafzumessung
Datum: 2014-02-19T00:00:00.000000000
AZ: 2 StR 413/13

Recommendation # 2: KORE313582016 (score: {'hybr

test 2: doi 'KORE634732019'

In [74]:
recommendAndShow('KORE634732019', 10, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 98
number of edges: 129
selecting 98 candidate documents..
97 found
determining recommend documents..
document with highest indegree: JURE120006049
document with highest closeness centrality: KORE624222018
document with highest betweenness centrality: KORE604202020
document with highest Katz centrality: JURE120006049

DOI: KORE634732019
Titel: Berücksichtigung von Einziehung bei Strafzumessung
Datum: 2019-11-05T00:00:00.000000000
AZ: 2 StR 447/19

Recommendation # 1: JURE120006049 (score: {'hybrid': 0.984528034119777, 'net': 1.0, 'text': 0.9690560682395539})
Titel: Strafzumessung: Berücksichtigung der Einziehung eines Pkw
Datum: 2012-02-16T00:00:00.000000000
AZ: 3 StR 470/11

Recommendation # 2: KORE624222018 (score: {'hybrid': 0.9593944174330702, 'net': 0.9611650485436893, 'text': 0.9576237863224509})
Titel: Feststellung des Wertes des einzuziehenden Gegenstandes notwendig
Datum: 2018-05-03T00:00:00.000000000
AZ: 3 StR 8/18


test 3

In [75]:
# test3 = calcHybridRecommends('JURE150008058', df_embeddings_short, dg)
# Dokument hat keine Links (innerhalb des Korpus).. hier müsste auf 100% textbasierte recommends zurückgegriffen werden

test 4

In [77]:
recommendAndShow('JURE130011761', 5, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 98
number of edges: 135
selecting 98 candidate documents..
97 found
determining recommend documents..
document with highest indegree: JURE130016580
document with highest closeness centrality: JURE120001005
document with highest betweenness centrality: JURE120001005
document with highest Katz centrality: JURE130016580

DOI: JURE130011761
Titel: Strafbarkeit eines Baukolonnenführers wegen Steuerhinterziehung in Tateinheit mit Vorenthaltung und Veruntreuung von Arbeitsentgelt bei Beschäftigung von Schwarzarbeitern sowie Strafbarkeit eines Mitwirkenden wegen Beihilfe
Datum: 2013-06-05T00:00:00.000000000
AZ: 1 StR 626/12

Recommendation # 1: JURE120001005 (score: {'hybrid': 0.9205328683231088, 'net': 0.9824561403508771, 'text': 0.8586095962953404})
Titel: Steuerhinterziehung: Berichtigung des auf Grund unberechtigten Steuerausweises geschuldeten Umsatzsteuerbetrages; Strafzumessung bei Serientaten
Datum: 2011-11-29T00:00:00.000000

### experimental evaluation of random DOIs

#### KORE619292019

In [78]:
recommendAndShow('KORE619292019', 5, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 57
number of edges: 71
selecting 57 candidate documents..
56 found
determining recommend documents..
document with highest indegree: JURE100071305
document with highest closeness centrality: KORE621042018
document with highest betweenness centrality: KORE621042018
document with highest Katz centrality: JURE100071305

DOI: KORE619292019
Titel: Subventionsbetrug: Vorliegen subventionserheblicher Tatsachen
Datum: 2018-08-22T00:00:00.000000000
AZ: 3 StR 449/17

Recommendation # 1: KORE305842014 (score: {'hybrid': 0.8336094412830464, 'net': 0.8225806451612903, 'text': 0.8446382374048026})
Titel: Subventionsbetrug: Erschleichung von Fördermitteln für den Wohnungsbau durch eine Privatperson; Begriff der Subvention; Ermittlung subventionserheblicher Regelungen
Datum: 2014-05-28T00:00:00.000000000
AZ: 3 StR 206/13

Recommendation # 2: KORE621042018 (score: {'hybrid': 0.828396869308061, 'net': 0.7419354838709677, 'text': 0.914858254745

####  KORE603522019

In [79]:
recommendAndShow('KORE603522019', 5, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 88
number of edges: 94
selecting 88 candidate documents..
87 found
determining recommend documents..
document with highest indegree: KORE624622018
document with highest closeness centrality: KORE308492018
document with highest betweenness centrality: KORE308492018
document with highest Katz centrality: KORE624622018

DOI: KORE603522019
Titel: Einziehungsanordnung bei Erklärung des Verzichts auf Rückgabe
Datum: 2018-12-10T00:00:00.000000000
AZ: 5 StR 539/18

Recommendation # 1: KORE308492018 (score: {'hybrid': 0.8125001430870032, 'net': 1.0, 'text': 0.6250002861740065})
Titel: Unerlaubtes Handeltreiben mit Betäubungsmitteln: Erforderlichkeit einer förmlichen Einziehung der beim Angeklagten sichergestellten Betäubungsmittelerlöse
Datum: 2018-04-10T00:00:00.000000000
AZ: 5 StR 611/17

Recommendation # 2: JURE160000359 (score: {'hybrid': 0.7553733145286039, 'net': 0.8840579710144928, 'text': 0.6266886580427149})
Titel: Betäubungs

#### JURE160012885

In [80]:
recommendAndShow('JURE160012885', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 78
number of edges: 115
selecting 78 candidate documents..
77 found
determining recommend documents..
document with highest indegree: JURE130011773
document with highest closeness centrality: KORE611002017
document with highest betweenness centrality: KORE611002017
document with highest Katz centrality: JURE120017848

DOI: JURE160012885
Titel: Mordversuch: Definition von Heimtücke sowie Arg- und Wehrlosigkeit; Heimtücke des Wohnungsinhabers bei einer polizeilich veranlassten Wohnungsöffnung; Wegfall der Wehrlosigkeit durch Flucht des Opfers
Datum: 2016-06-28T00:00:00.000000000
AZ: 3 StR 120/16

Recommendation # 1: KORE611002017 (score: {'hybrid': 0.8691218861625982, 'net': 0.9418604651162791, 'text': 0.7963833072089174})
Titel: Mordmerkmal: Heimtücke
Datum: 2017-01-24T00:00:00.000000000
AZ: 2 StR 459/16

Recommendation # 2: JURE130011773 (score: {'hybrid': 0.8302596930454176, 'net': 1.0, 'text': 0.660519386090835})
Titel: Str

#### KORE633042019

In [81]:
recommendAndShow('KORE633042019', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 155
number of edges: 186
selecting 155 candidate documents..
154 found
determining recommend documents..
document with highest indegree: JURE140019879
document with highest closeness centrality: KORE618212017
document with highest betweenness centrality: KORE618212017
document with highest Katz centrality: JURE140019879

DOI: KORE633042019
Titel: Betrug: Verklammerung zur Tateinheit durch unerlaubtes Betreiben von Bankgeschäften
Datum: 2019-10-15T00:00:00.000000000
AZ: 3 StR 379/19

Recommendation # 1: KORE618212017 (score: {'hybrid': 0.9548317119955492, 'net': 1.0, 'text': 0.9096634239910985})
Titel: Revision in Strafsachen: Beschwer des wegen Beihilfe statt Mittäterschaft verurteilten Angeklagten; natürliche Handlungseinheit zwischen der Beteiligung an der Vortat und der anschließenden Hehlerei; Konkurrenzverhältnis zwischen mehreren betrügerischen Einkäufen mit einer EC-Karte; Verwerfung der Berufung durch Beschluss als of

#### KORE618742019

In [82]:
recommendAndShow('KORE618742019', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 67
number of edges: 89
selecting 67 candidate documents..
66 found
determining recommend documents..
document with highest indegree: JURE140009960
document with highest closeness centrality: JURE140009960
document with highest betweenness centrality: JURE140009960
document with highest Katz centrality: JURE140009960

DOI: KORE618742019
Titel: Strafurteil: Absoluter Revisionsgrund einer Überschreitung der Urteilsabsetzungsfrist; Berücksichtigung früherer Auslandsverurteilungen im Rahmen der Strafzumessung und der Gesamtstrafenbildung
Datum: 2018-12-18T00:00:00.000000000
AZ: 1 StR 508/18

Recommendation # 1: JURE160012017 (score: {'hybrid': 0.848149046763369, 'net': 0.948051948051948, 'text': 0.74824614547479})
Titel: Jugendstrafverfahren wegen Betäubungsmitteldelikten: Maßgebliches Stufenverhältnis hinsichtlich Art und Gefährlichkeit des Rauschgifts im Rahmen der Strafzumessung; Bewertung von Methamphetamin; Anforderungen an d

#### JURE140020317

In [83]:
recommendAndShow('JURE140020317', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 658
number of edges: 1105
selecting 658 candidate documents..
657 found
determining recommend documents..
document with highest indegree: KORE300532012
document with highest closeness centrality: KORE300532012
document with highest betweenness centrality: KORE300532012
document with highest Katz centrality: KORE300532012

DOI: JURE140020317
Titel: Strafverfahren wegen Massenbetruges: Irrtumsbedingte Vermögensverfügung bei Zahlung auf unberechtigte Rechnung; notwendige Feststellungen zum Irrtum der Verfügenden
Datum: 2014-09-04T00:00:00.000000000
AZ: 1 StR 314/14

Recommendation # 1: KORE303572014 (score: {'hybrid': 0.8572446561248432, 'net': 0.8466353677621283, 'text': 0.8678539444875581})
Titel: Strafverfahren wegen gewerbsmäßigen Bandenbetruges: Anforderungen an die tatrichterliche Feststellung und Darlegung des täuschungsbedingten Irrtums kontaktierter Personen in einer Vielzahl von Einzelfällen des Missbrauchs des Einzugs

#### KORE603922017

In [84]:
recommendAndShow('KORE603922017', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 156
number of edges: 192
selecting 156 candidate documents..
155 found
determining recommend documents..
document with highest indegree: JURE160013475
document with highest closeness centrality: JURE160013475
document with highest betweenness centrality: JURE160013475
document with highest Katz centrality: JURE160013475

DOI: KORE603922017
Titel: Sachverhaltsaufklärung durch das Gericht: Umfang der Amtsaufklärungspflicht bei Erfolgsdelikten
Datum: 2017-08-17T00:00:00.000000000
AZ: 4 StR 127/17

Recommendation # 1: JURE120010394 (score: {'hybrid': 0.8171625390830712, 'net': 0.9635036496350365, 'text': 0.670821428531106})
Titel: Sexuelle Nötigung: Voraussetzungen einer schutzlosen Lage; konkludente Drohung auf Grund vorheriger Gewalterfahrung; Misshandlung eines Schutzbefohlenen durch fortgesetzte körperliche Züchtigungen
Datum: 2012-03-20T00:00:00.000000000
AZ: 4 StR 561/11

Recommendation # 2: JURE160013475 (score: {'hybrid':

#### KORE612272017

In [86]:
recommendAndShow('KORE612272017', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 96
number of edges: 102
selecting 96 candidate documents..
95 found
determining recommend documents..
document with highest indegree: KORE316702013
document with highest closeness centrality: KORE625282019
document with highest betweenness centrality: KORE316702013
document with highest Katz centrality: KORE316702013

DOI: KORE612272017
Titel: Widerrechtliches Parken: Nötigung und Erpressung durch Anbringen einer Parkkralle an Pkw oder Abschleppen an nicht genannten Ort
Datum: 2016-12-21T00:00:00.000000000
AZ: 1 StR 253/16

Recommendation # 1: KORE316702013 (score: {'hybrid': 0.9172384175350246, 'net': 1.0, 'text': 0.8344768350700491})
Titel: Nötigung durch anwaltliches Mahnschreiben: Aus der Tat erlangter Vermögenswert; Urteilstenor bei Absehen von Verfall wegen entgegenstehender Ansprüche Verletzter
Datum: 2013-09-05T00:00:00.000000000
AZ: 1 StR 162/13

Recommendation # 2: KORE615752017 (score: {'hybrid': 0.8307020603658544

#### KORE621852017

In [49]:
recommendAndShow('KORE621852017', 5, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 118
number of edges: 169
selecting 118 candidate documents..
117 found
determining recommend documents..
document with highest indegree: KORE300052016
document with highest closeness centrality: KORE624792017
document with highest betweenness centrality: JURE130001697
document with highest Katz centrality: KORE300052016

DOI: KORE621852017
Titel: Besondere Haftprüfung bei Untersuchungshaft über 6 Monate: Anforderungen an die Beschreibung des Tatvorwurfs der Unterstützung einer terroristischen Vereinigung im Haftbefehl
Datum: 1502928000000
AZ: AK 34/17

Recommendation # 1: KORE624792017 (score: {'hybrid': 0.9304390530826006, 'net': 0.9609375, 'text': 0.8999406061652011})
Titel: Vorbereitung einer schweren staatsgefährdenden Gewalttat: Bestimmung des Verfahrensgegenstandes durch den Anklagesatz; erneuter Erlass eines Haftbefehls; Vorliegen einer Vorbereitungshandlung; Voraussetzungen der Unterstützung einer terroristischen Vere

## final evaluation (compare quality of all three approaches / recommender functions)

step 3: generate hybrid recommends

In [None]:
# Gesamtevaluation mit interessanten Titeln

1. JURE130015173

In [87]:
recommendAndShow('JURE130015173', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 345
number of edges: 582
selecting 345 candidate documents..
344 found
determining recommend documents..
document with highest indegree: KORE300532012
document with highest closeness centrality: KORE300532012
document with highest betweenness centrality: KORE300532012
document with highest Katz centrality: KORE300532012

DOI: JURE130015173
Titel: Abgrenzung zwischen Tötungsversuch und schwerer Körperverletzung: Anforderungen an Urteilsfeststellungen zum bedingten Tötungsvorsatz bei Handeln in affektiver Erregung; sichtbare Narbe im Gesicht als dauernde erhebliche Entstellung
Datum: 2013-07-17T00:00:00.000000000
AZ: 2 StR 139/13

Recommendation # 1: KORE300532012 (score: {'hybrid': 0.9510337839075825, 'net': 1.0, 'text': 0.9020675678151651})
Titel: Bedingter Tötungsvorsatz im Lichte der Hemmschwellentheorie
Datum: 2012-03-22T00:00:00.000000000
AZ: 4 StR 558/11

Recommendation # 2: JURE120006798 (score: {'hybrid': 0.94551309871

2. KORE308942018

In [112]:
recommendAndShow('KORE308942018', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 307
number of edges: 445
selecting 307 candidate documents..
306 found
determining recommend documents..
document with highest indegree: KORE300052016
document with highest closeness centrality: KORE300452019
document with highest betweenness centrality: KORE300452019
document with highest Katz centrality: KORE300052016

DOI: KORE308942018
Titel: Bildung bewaffneter Gruppen: Begriff der Gruppe; Begriff des Verfügens über Waffen oder andere gefährliche Werkzeuge
Datum: 1528934400000
AZ: 3 StR 585/17

Recommendation # 1: KORE300052016 (score: {'hybrid': 0.9293900765425183, 'net': 1.0, 'text': 0.8587801530850365})
Titel: Mitgliedschaft in einer kriminellen oder terroristischen Vereinigung: Bestimmung der Konkurrenzverhältnisse bei strafbaren Handlungen im Rahmen der Mitgliedschaft
Datum: 1436400000000
AZ: 3 StR 537/14

Recommendation # 2: JURE160012884 (score: {'hybrid': 0.8535963509519966, 'net': 0.9475409836065574, 'text': 0.7

3. JURE160020935

In [88]:
recommendAndShow('JURE160020935', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 597
number of edges: 1036
selecting 597 candidate documents..
596 found
determining recommend documents..
document with highest indegree: KORE300532012
document with highest closeness centrality: KORE300532012
document with highest betweenness centrality: KORE300532012
document with highest Katz centrality: KORE300532012

DOI: JURE160020935
Titel: Notwehr: Dauer eines gegenwärtigen Angriffs; Notwehrlage bei subjektiver Befürchtung eines Angriffs
Datum: 2016-11-24T00:00:00.000000000
AZ: 4 StR 235/16

Recommendation # 1: JURE120006798 (score: {'hybrid': 0.9185075667081135, 'net': 0.9600665557404326, 'text': 0.8769485776757943})
Titel: Tötungsdelikt: Voraussetzungen eines bedingten Tötungsvorsatzes; Bewertung nachträglichen Bedauerns und von Rettungsversuchen des Täters
Datum: 2012-02-23T00:00:00.000000000
AZ: 4 StR 608/11

Recommendation # 2: JURE140000756 (score: {'hybrid': 0.9119592221822066, 'net': 0.9118136439267887, 'text'

4. JURE150008059

In [114]:
recommendAndShow('JURE150008059', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 175
number of edges: 190
selecting 175 candidate documents..
174 found
determining recommend documents..
document with highest indegree: JURE150006558
document with highest closeness centrality: KORE617162020
document with highest betweenness centrality: KORE617162020
document with highest Katz centrality: JURE150006558

DOI: JURE150008059
Titel: Werben um Mitglieder für eine terroristische Vereinigung im Internet: Video mit dem Aufruf zur Unterstützung von Al Qaida mit Geldmitteln und mit dem Einsatz des eigenen Lebens
Datum: 1427932800000
AZ: 3 StR 197/14

Recommendation # 1: JURE130001697 (score: {'hybrid': 0.8709311297156995, 'net': 0.8691588785046729, 'text': 0.8727033809267262})
Titel: Unterstützung einer ausländischen terroristischen Vereinigung: Verbreitung eines Enthauptungsvideos der Al Qaida im Internet
Datum: 1348099200000
AZ: 3 StR 314/12

Recommendation # 2: JURE120016232 (score: {'hybrid': 0.7532090253324665, '

5. JURE120006766

In [115]:
recommendAndShow('JURE120006766', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 9
number of edges: 8
selecting 9 candidate documents..
8 found
determining recommend documents..
document with highest indegree: JURE110005959
document with highest closeness centrality: JURE110005959
document with highest betweenness centrality: JURE110005959
document with highest Katz centrality: JURE110005959

DOI: JURE120006766
Titel: Unerlaubtes Handeltreiben mit Betäubungsmitteln in nicht geringer Menge: Berücksichtigung der Beteiligung verdeckter Ermittler bei der Strafzumessung
Datum: 1328745600000
AZ: 2 StR 455/11

Recommendation # 1: JURE110011175 (score: {'hybrid': 0.8197617402086659, 'net': 0.8125, 'text': 0.8270234804173318})
Titel: Unerlaubtes Handeltreiben mit Betäubungsmitteln: Vorliegen eines minder schweren Falls
Datum: 1304380800000
AZ: 5 StR 568/10

Recommendation # 2: JURE110005959 (score: {'hybrid': 0.8178267554014171, 'net': 1.0, 'text': 0.6356535108028344})
Titel: Unerlaubter Betäubungsmittelhandel: An

6. JURE120010397

In [116]:
recommendAndShow('JURE120010397', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 44
number of edges: 47
selecting 44 candidate documents..
43 found
determining recommend documents..
document with highest indegree: JURE100073554
document with highest closeness centrality: JURE100073554
document with highest betweenness centrality: JURE100073554
document with highest Katz centrality: JURE100073554

DOI: JURE120010397
Titel: Gefährliche Körperverletzung: Bewertung entlastender Angaben des Angeklagten; Zurechnung eines Mittäterexzesses; Quarzhandschuh und bei Tritten gegen den Kopf getragenes Schuhwerk als gefährliche Werkzeuge
Datum: 1335398400000
AZ: 4 StR 51/12

Recommendation # 1: JURE100073554 (score: {'hybrid': 0.9055497502209681, 'net': 1.0, 'text': 0.8110995004419361})
Titel: Gefährliche Körperverletzung und schwerer Raub: Straßenschuh als gefährliches Werkzeug; Begriff des hinterlistigen Überfalls; Voraussetzungen einer schweren körperlichen Misshandlung
Datum: 1284508800000
AZ: 2 StR 395/10

Recomme

7. JURE160021072

In [117]:
recommendAndShow('JURE160021072', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 70
number of edges: 75
selecting 70 candidate documents..
69 found
determining recommend documents..
document with highest indegree: JURE110022632
document with highest closeness centrality: JURE100064635
document with highest betweenness centrality: JURE100064635
document with highest Katz centrality: JURE110022632

DOI: JURE160021072
Titel: Totschlag durch Unterlassen: Garantenstellung aus familiärer Beistandspflicht
Datum: 1476316800000
AZ: 3 StR 248/16

Recommendation # 1: JURE100064635 (score: {'hybrid': 0.913653139682978, 'net': 1.0, 'text': 0.827306279365956})
Titel: Strafzumessung: Anforderungen an die Strafzumessung beim minder schweren Fall des versuchten Totschlags ohne Veranlassung durch das Opfer
Datum: 1272326400000
AZ: 3 StR 106/10

Recommendation # 2: JURE110022632 (score: {'hybrid': 0.8989860707748633, 'net': 1.0, 'text': 0.7979721415497265})
Titel: Strafzumessung bei schwerem Raub: Prüfungsreihenfolge bei mi

8. JURE140006183

In [118]:
recommendAndShow('JURE140006183', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 66
number of edges: 115
selecting 66 candidate documents..
65 found
determining recommend documents..
document with highest indegree: KORE311242011
document with highest closeness centrality: KORE300112013
document with highest betweenness centrality: KORE300112013
document with highest Katz centrality: KORE311242011

DOI: JURE140006183
Titel: Betrug: Voraussetzungen eines Wettbetruges im Falle von Sportwetten
Datum: 1394496000000
AZ: 4 StR 479/13

Recommendation # 1: KORE300112013 (score: {'hybrid': 0.8395149138398492, 'net': 0.9868421052631579, 'text': 0.6921877224165405})
Titel: Sportwettenbetrug: Feststellung des Schadens
Datum: 1355961600000
AZ: 4 StR 55/12

Recommendation # 2: JURE160005316 (score: {'hybrid': 0.8178474815850911, 'net': 0.75, 'text': 0.8856949631701824})
Titel: Sportwettenbetrug beim Wetten im Internet: Täuschungsäquivalenz des unbefugten Verwendens von Daten; Schadenseintritt
Datum: 1456963200000
AZ: 4 

9. KORE307212020

In [119]:
recommendAndShow('KORE307212020', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 104
number of edges: 128
selecting 104 candidate documents..
103 found
determining recommend documents..
document with highest indegree: JURE160012884
document with highest closeness centrality: KORE305632018
document with highest betweenness centrality: KORE305632018
document with highest Katz centrality: JURE160012884

DOI: KORE307212020
Titel: Schwere Brandstiftung: Teilweise Zerstörung eines als Flüchtlingsunterkunft genutzten Gebäudes durch Zimmerbrand
Datum: 1573689600000
AZ: 3 StR 408/19

Recommendation # 1: JURE140006413 (score: {'hybrid': 0.7993621987988477, 'net': 0.8705882352941177, 'text': 0.7281361623035779})
Titel: Strafverfahren wegen Brandstiftung: Schwere Brandstiftung bei Verlassen des menschenleeren Tatobjekts nach der Brandlegung
Datum: 1384387200000
AZ: 3 StR 336/13

Recommendation # 2: KORE305632018 (score: {'hybrid': 0.7843367537790816, 'net': 0.9058823529411765, 'text': 0.6627911546169868})
Titel: Schw

10. KORE603532020

In [120]:
recommendAndShow('KORE603532020', 3, df_embeddings_short, dg)

creating multilevel directed citation net..
number of nodes: 103
number of edges: 126
selecting 103 candidate documents..
102 found
determining recommend documents..
document with highest indegree: JURE100068632
document with highest closeness centrality: KORE620972019
document with highest betweenness centrality: KORE620972019
document with highest Katz centrality: JURE100068632

DOI: KORE603532020
Titel: Untreue und Bestechlichkeit im geschäftlichen Verkehr: Amtsträgereigenschaft eines Sparkassenangestellten
Datum: 1576022400000
AZ: 5 StR 486/19

Recommendation # 1: JURE100068632 (score: {'hybrid': 0.8902268629061643, 'net': 1.0, 'text': 0.7804537258123284})
Titel: Verfallsanordnung bei versuchtem Betrug
Datum: 1277769600000
AZ: 1 StR 245/09

Recommendation # 2: KORE627642018 (score: {'hybrid': 0.8696386429166797, 'net': 0.9886363636363636, 'text': 0.7506409221969957})
Titel: Vorteilsannahme bei der Werbevermarktung im öffentlichen Personennahverkehr
Datum: 1532995200000
AZ: 3 StR 62