# Stage 4: Recommendations using Shortest Pathes to Compute Relevance Between Requests and Articles
Attempts to determine how relevent each Content node is to each Request node and builds Recommendation nodes to store that score for highly rated relevance.

The result of this step includes:
- Recommendation nodes, connected to Content nodes with a RECOMMENDS, and Request nodes with a RELATES_TO relationship

In [None]:
import logging

## Parameters
OpenTLDR workflows use the notebook block tagged as "parameters" to inject variables (for example to change the recommendation thresholds).

> **Do Not Change Variable Names in the Parameters Block** you are welcome to change the values of these parameter variables, but please do not change their names. They are used elsewhere in the notebook and in other workflow processes.

In [None]:
#Parameters
recommendation_threshold = 0.75

delete_existing_recommendations = True

# Logging level ranges are (from least to most verbose): ERROR, WARN, INFO, DEBUG
logging_level = logging.INFO

# List of the UserIdqs to Ingest
list_of_uids = None

# level of unnecessary output
verbose = True


## Setup

In [None]:
logging.getLogger("OpenTLDR").setLevel(logging_level)

import opentldr.Domain as domain
from opentldr.Domain import Request, Content, Recommendation

from opentldr import KnowledgeGraph
kg=KnowledgeGraph()



In [None]:
if list_of_uids is None:
    list_of_uids = kg.get_all_node_uids_by_tag('Request')

if verbose:
    print("Found {} Request nodes to process.".format(len(list_of_uids)))

# Recommend Contents based on their relationships thru the KG to the Request
Relevance is scored to include:
- The distance of the node, thru the KG from the request
- With an integrated penelty for uncertainty in relationships
- The average relevance of a nodes neighbors

### Average the distance relevance scores for each Article based on its neighbors 

In [None]:
shortest_path_cypher = """
            MATCH path=shortestPath((s)-[*..10]-(e))
            WHERE s.uid='{start_id}'
            AND e.uid='{end_id}'                         
            AND NONE(n IN nodes(path) WHERE 'Recommendation' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Tldr' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Recommendation' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Summary' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Feedback' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Source' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Request' IN LABELS(n) AND n.uid<>"{start_id}")             
            AND NONE(n IN nodes(path) WHERE 'Content' IN LABELS(n) AND n.uid<>"{end_id}")             
            AND NONE(n IN nodes(path) WHERE 'User' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'EvalKey' IN LABELS(n))
            AND NONE(n IN nodes(path) WHERE 'Similarity' IN LABELS(n))
            RETURN path
            """

def get_recommendation_score(kg:KnowledgeGraph, request:Request, content_uid:str, shortest_path_cypher:str=shortest_path_cypher) -> float:
    count=1
    acc=0.0
    decay_rate=0.2

    try:
        q = kg.neomodel_query(shortest_path_cypher.format(start_id=request.uid, end_id=content_uid))
        path=q[0][0][0]
        acc = 1.0-((len(path.nodes)-5)*decay_rate)
    except:
        pass # no path remains 0.0

    if acc <= 0.0:
        return 0.0;

    for e in kg.get_entities_by_request(request):
        count+=1
        try:
            q = kg.neomodel_query(shortest_path_cypher.format(start_id=e.uid, end_id=content_uid))
            path=q[0][0][0]
            acc+= 1.0-((len(path.nodes)-4)*decay_rate)
        except:
            pass # no addition if no path found

    out = round(acc/count,3)
    
    if out > 1.0:
        return 1.0
    
    return out

# Process Each Query in the System

In [None]:
if delete_existing_recommendations:
    kg.delete_all_recommendations()

In [None]:
def average_content(distance_scores:dict):
    avg_scores=dict()
    all_content = kg.get_all_content()
    for article in all_content:
        acc=distance_scores.get(str(article.uid),0.0);
        count=1
        if acc > 0.0:
            for e in kg.get_entities_by_content(article):
                acc+=distance_scores.get(str(e.uid),0.0)
                count+=1
        avg_scores[str(article.uid)]=round(acc/count,3)
    return avg_scores

In [None]:
from opentldr.Domain import Recommendation, Request, Content

content_uids = kg.get_all_node_uids_by_tag('Content')

for uid in list_of_uids:
    request = kg.get_request_by_uid(uid)
    print("\nRequest: ("+request.title+"): "+request.text)

    content_scores=dict()
    for content_uid in content_uids:
        content_scores[content_uid]= get_recommendation_score(kg, request, content_uid) 
    
    sorted_recommendations=sorted(content_scores.items(), key=lambda x:x[1], reverse=True)
    for art_item in sorted_recommendations:
        recommendation_score=art_item[1]
        content = kg.get_content_by_uid(art_item[0])

        if recommendation_score > recommendation_threshold:
            print("\tRecommending:\t"+ content.title +" ("+str(recommendation_score)+") ")
            kg.add_recommendation(request=request,content=content,score=recommendation_score)
        elif verbose:
            print("\tNOT Recommending:\t"+ content.title +" ("+str(recommendation_score)+") ")


In [None]:
kg.close()