# PageRank

In [137]:
import numpy as np

## Traditional Method

In [138]:
def create_transition_matrix(vertices_dict, vertices_order, string = True):
    matrix = []
    for v in vertices_order:
        r = []
        for u in vertices_order:
            if v in vertices_dict[u]:
                if string:
                    r.append("1 / " + str(len(vertices_dict[u])))
                else:
                    r.append(1 / len(vertices_dict[u]))
            else:
                if string:
                    r.append("0")
                else:
                    r.append(0)
        matrix.append(r)
    return matrix


def power_iteration(matrix, node_number, threshold):
    matrix = np.array(matrix)
    count = 0
    old_r = np.ones([node_number, 1]) / node_number
    new_r = matrix.dot(old_r)
    while np.sum(np.abs(new_r-old_r)) >= threshold:
        count += 1
        if (np.sum(np.abs(new_r-old_r))) == 0:
            break
        old_r = new_r
        new_r = matrix.dot(old_r)
    return new_r, count


def power_iteration_by_number(matrix, node_number, iteration_number):
    matrix = np.array(matrix)
    old_r = np.ones([node_number, 1]) / node_number
    new_r = matrix.dot(old_r)
    count = 1
    while count < iteration_number:
        count += 1
        if (np.sum(np.abs(new_r-old_r))) == 0:
            break
        old_r = new_r
        new_r = matrix.dot(old_r)
    return new_r, count

In [139]:
nodes = dict()
nodes["A"] = ["A", "B", "C", "D"]
nodes["B"] = ["D"]
nodes["C"] = []
nodes["D"] = ["B", "C"]
# nodes["A"] = ["B", "C", "D"]
# nodes["B"] = ["A", "D"]
# nodes["C"] = ["A"]
# nodes["D"] = ["B", "C"]

nodes_order = ["A", "B", "C", "D"]

transition_matrix = create_transition_matrix(nodes, nodes_order, False)
for row in transition_matrix:
    print(row)

[0.25, 0, 0, 0]
[0.25, 0, 0, 0.5]
[0.25, 0, 0, 0.5]
[0.25, 1.0, 0, 0]


In [140]:
result = power_iteration(transition_matrix, 4, 0)
print(result)

(array([[0.],
       [0.],
       [0.],
       [0.]]), 2149)


In [141]:
after_one = power_iteration_by_number(transition_matrix, 4, 1)
print(after_one)

(array([[0.0625],
       [0.1875],
       [0.1875],
       [0.3125]]), 1)


## Map Reduce Method

In [142]:
def map_part(url_pagerank_outlinks_dict, node_order):
    result_dict = {}
    for node in node_order:
        result_node_dict = {}
        node_dict = url_pagerank_outlinks_dict[node]
        url_pagerank = list(node_dict.keys())[0]
        outlinks = list(node_dict.values())[0]
        for outlink in outlinks:
            result_node_dict[outlink] = url_pagerank[1] / len(outlinks)
        result_node_dict[url_pagerank[0]] = outlinks
        result_dict[node] = result_node_dict
    return result_dict


def produce_map_output(map_result):
    result_dict = {}
    for node_i in map_result:
        for node_j in map_result[node_i]:
            if node_j.upper() in result_dict:
                result_dict[node_j.upper()].append(map_result[node_i][node_j])
            else:
                result_dict[node_j.upper()] = [map_result[node_i][node_j]]
    return result_dict


def reduce_part(map_result, node_order):
    result_dict = {}
    for node in node_order:
        node_value = map_result[node]
        key = 0
        value = None
        for v in node_value:
            if type(v) != list:
                key += v
            else:
                value = v
        key = (node, key)
        result_dict[node] = {key: value}
    return result_dict

In [143]:
url_pagerank_outlinks_dict = {
    "A": {("A", 1/4): ["a", "b", "c", "d"]},
    "B": {("B", 1/4): ["d"]},
    "C": {("C", 1/4): []},
    "D": {("D", 1/4): ["b", "c"]},
}
# url_pagerank_outlinks_dict = {
#     "A": {("A", 1/4): ["A", "B", "B", "D"]},
#     "B": {("B", 1/4): ["D"]},
#     "C": {("C", 1/4): []},
#     "D": {("D", 1/4): ["B", "C"]},
# }
node_order = ["A", "B", "C", "D"]
print("First Map Input:")
for node in node_order:
    print(node, url_pagerank_outlinks_dict[node])
print()
map_result = map_part(url_pagerank_outlinks_dict, node_order)
print("First Map Output:")
for node in node_order:
    print(node, map_result[node])
print()
map_result = produce_map_output(map_result)
print("First Reduce Input:")
for node in node_order:
    print(node, map_result[node])
print()
reduce_result = reduce_part(map_result, node_order)
print("First Reduce Output:")
for node in node_order:
    print(node, reduce_result[node])


First Map Input:
A {('A', 0.25): ['a', 'b', 'c', 'd']}
B {('B', 0.25): ['d']}
C {('C', 0.25): []}
D {('D', 0.25): ['b', 'c']}

First Map Output:
A {'a': 0.0625, 'b': 0.0625, 'c': 0.0625, 'd': 0.0625, 'A': ['a', 'b', 'c', 'd']}
B {'d': 0.25, 'B': ['d']}
C {'C': []}
D {'b': 0.125, 'c': 0.125, 'D': ['b', 'c']}

First Reduce Input:
A [0.0625, ['a', 'b', 'c', 'd']]
B [0.0625, ['d'], 0.125]
C [0.0625, [], 0.125]
D [0.0625, 0.25, ['b', 'c']]

First Reduce Output:
A {('A', 0.0625): ['a', 'b', 'c', 'd']}
B {('B', 0.1875): ['d']}
C {('C', 0.1875): []}
D {('D', 0.3125): ['b', 'c']}
