# PageRank

In [87]:
import numpy as np

## Traditional Method

In [88]:
def create_transition_matrix(vertices_dict, vertices_order, string = True):
    matrix = []
    for v in vertices_order:
        r = []
        for u in vertices_order:
            if v in vertices_dict[u]:
                if string:
                    r.append("1 / " + str(len(vertices_dict[u])))
                else:
                    r.append(1 / len(vertices_dict[u]))
            else:
                if string:
                    r.append("0")
                else:
                    r.append(0)
        matrix.append(r)
    return matrix


def power_iteration(matrix, node_number, threshold):
    matrix = np.array(matrix)
    count = 0
    old_r = np.ones([node_number, 1]) / node_number
    new_r = matrix.dot(old_r)
    while np.sum(np.abs(new_r-old_r)) >= threshold:
        count += 1
        if (np.sum(np.abs(new_r-old_r))) == 0:
            break
        old_r = new_r
        new_r = matrix.dot(old_r)
    return new_r, count


def power_iteration_by_number(matrix, node_number, iteration_number):
    matrix = np.array(matrix)
    old_r = np.ones([node_number, 1]) / node_number
    new_r = matrix.dot(old_r)
    count = 1
    while count < iteration_number:
        count += 1
        if (np.sum(np.abs(new_r-old_r))) == 0:
            break
        old_r = new_r
        new_r = matrix.dot(old_r)
    return new_r, count

In [89]:
nodes = dict()
nodes["A"] = ["B", "C", "D"]
nodes["B"] = [""]
nodes["C"] = ["C", "D"]
nodes["D"] = ["A", "B"]
# nodes["A"] = ["B", "C", "D"]
# nodes["B"] = ["A", "D"]
# nodes["C"] = ["A"]
# nodes["D"] = ["B", "C"]

nodes_order = ["A", "B", "C", "D"]

transition_matrix = create_transition_matrix(nodes, nodes_order, False)
for row in transition_matrix:
    print(row)

[0, 0, 0, 0.5]
[0.3333333333333333, 0, 0, 0.5]
[0.3333333333333333, 0, 0.5, 0]
[0.3333333333333333, 0, 0.5, 0]


In [90]:
result = power_iteration(transition_matrix, 4, 0)
print(result)

(array([[9.9e-324],
       [1.5e-323],
       [1.5e-323],
       [1.5e-323]]), 2346)


In [91]:
after_one = power_iteration_by_number(transition_matrix, 4, 1)
print(after_one)

(array([[0.125     ],
       [0.20833333],
       [0.20833333],
       [0.20833333]]), 1)


## Map Reduce Method

In [92]:
def map_part(url_pagerank_outlinks_dict, node_order):
    result_dict = {}
    for node in node_order:
        result_node_dict = {}
        node_dict = url_pagerank_outlinks_dict[node]
        url_pagerank = list(node_dict.keys())[0]
        outlinks = list(node_dict.values())[0]
        for outlink in outlinks:
            result_node_dict[outlink] = url_pagerank[1] / len(outlinks)
        result_node_dict[url_pagerank[0]] = outlinks
        result_dict[node] = result_node_dict
    return result_dict


def produce_map_output(map_result):
    result_dict = {}
    for node_i in map_result:
        for node_j in map_result[node_i]:
            if node_j in result_dict:
                result_dict[node_j].append(map_result[node_i][node_j])
            else:
                result_dict[node_j] = [map_result[node_i][node_j]]
    return result_dict


def reduce_part(map_result, node_order):
    result_dict = {}
    for node in node_order:
        node_value = map_result[node]
        key = 0
        value = None
        for v in node_value:
            if type(v) != list:
                key += v
            else:
                value = v
        key = (node, key)
        result_dict[node] = {key: value}
    return result_dict

In [93]:
url_pagerank_outlinks_dict = {
    "A": {("A", 1/4): ["B", "C", "D"]},
    "B": {("B", 1/4): ["A"]},
    "C": {("C", 1/4): ["B", "D"]},
    "D": {("D", 1/4): ["A", "B"]},
}
node_order = ["A", "B", "C", "D"]
print("First Map Input:")
for node in node_order:
    print(node, url_pagerank_outlinks_dict[node])
print()
map_result = map_part(url_pagerank_outlinks_dict, node_order)
print("First Map Output:")
for node in node_order:
    print(node, map_result[node])
print()
map_result = produce_map_output(map_result)
print("First Reduce Input:")
for node in node_order:
    print(node, map_result[node])
print()
reduce_result = reduce_part(map_result, node_order)
print("First Reduce Output:")
for node in node_order:
    print(node, reduce_result[node])


First Map Input:
A {('A', 0.25): ['B', 'C', 'D']}
B {('B', 0.25): ['A']}
C {('C', 0.25): ['B', 'D']}
D {('D', 0.25): ['A', 'B']}

First Map Output:
A {'B': 0.08333333333333333, 'C': 0.08333333333333333, 'D': 0.08333333333333333, 'A': ['B', 'C', 'D']}
B {'A': 0.25, 'B': ['A']}
C {'B': 0.125, 'D': 0.125, 'C': ['B', 'D']}
D {'A': 0.125, 'B': 0.125, 'D': ['A', 'B']}

First Reduce Input:
A [['B', 'C', 'D'], 0.25, 0.125]
B [0.08333333333333333, ['A'], 0.125, 0.125]
C [0.08333333333333333, ['B', 'D']]
D [0.08333333333333333, 0.125, ['A', 'B']]

First Reduce Output:
A {('A', 0.375): ['B', 'C', 'D']}
B {('B', 0.3333333333333333): ['A']}
C {('C', 0.08333333333333333): ['B', 'D']}
D {('D', 0.20833333333333331): ['A', 'B']}
