/
analyzer.py
121 lines (100 loc) · 3.85 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Module is used for analyzing link relationships
"""
from requests.exceptions import HTTPError
from ete3 import Tree, TreeStyle, TextFace, add_face_to_node
from .link import LinkNode
from .utils import multi_thread
class LinkTree:
"""
This is a class that represents a tree of links within TorBot. This can be used to build a tree,
examine the number of nodes, if a node exists within a tree, displaying the tree and
downloading the tree, and will be expanded in the future to meet further needs.
Attributes:
root (str): Represents root link
tld (bool): Decides whether or not to use additional top-level-domains besides .tor
stop_depth (int): Depth of which to stop searching for links
"""
def __init__(self, root_node, *, stop_depth=1):
self._tree = build_tree(root_node, stop=stop_depth)
def __len__(self):
return len(self._tree)
def __contains__(self, link):
return self._tree.search_nodes(name=link)
def save(self, file_name):
"""
Saves LinkTree to file with given file_name
Current file types supported are .png, .pdf, .svg
Args:
file_name (str): Name of file being saved to
"""
style = TreeStyle()
style.show_leaf_name = False
def my_layout(node):
node_style = TextFace(node.name, tight_text=True)
add_face_to_node(node_style, node, column=0, position='branch-bottom')
style.layout_fn = my_layout
self._tree.render(file_name, tree_style=style)
def show(self):
"""
Allows user to quickly view LinkTree
"""
style = TreeStyle()
style.show_leaf_name = False
def my_layout(node):
node_style = TextFace(node.name, tight_text=True)
add_face_to_node(node_style, node, column=0, position='branch-bottom')
style.layout_fn = my_layout
self._tree.show(tree_style=style)
def initialize_tree(root_node):
"""
Creates root of tree
Args:
link (str): link node to be used as root
tld (bool): Additional top-level-domains
Returns:
root (ete3.Tree): root node of tree
to_visit (list): Children of root node
"""
root = Tree(name=root_node.name)
children = root_node.links
return root, children
def build_tree(link=None, *, stop=1, rec=0, to_visit=None, tree=None):
"""
Builds tree using Breadth First Search. You can specify stop depth.
Rec & tree arguments are used for recursion.
*NOTE: This function uses a GET request for each url found, this can
be very expensive so avoid if possible try to acquire the urls to
be traversed and use bfs function.
Args:
link (str): root node
tld (boolean): specifies if all top-level-domains will be allowed or not
stop (int): stops traversing at this depth if specified
rec (int): used for recursion
tree (ete3.Tree): a tree node used for recursion
Returns:
tree (ete3.Tree): built tree
"""
if rec == 0:
tree, to_visit = initialize_tree(link)
sub_tree = Tree(name=tree.name)
if rec == stop:
# If recursion is 0 then sub_tree will be root
return sub_tree if rec == 0 else tree
children_to_visit = list()
for link in to_visit:
try:
node = LinkNode(link)
except (ValueError, ConnectionError, HTTPError):
return None
link_node = sub_tree.add_child(name=node.name)
link_children = node.links
for child in link_children:
link_node.add_child(name=child)
children_to_visit.append(child)
rec += 1
# If we've reached stop depth then return tree
if stop == rec:
return sub_tree
new_tree = tree.add_child(sub_tree)
return build_tree(to_visit=children_to_visit, stop=stop, rec=rec, tree=new_tree)