-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
210 lines (178 loc) · 5.85 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""
This file contains all the helper functions used in the main script
Functions such as loading the directories and subdomains from the files, writing the results to the output_files
directory, and the functions that are used to crawl the directories and subdomains
"""
import os
import requests
from test_script import count_dir, count_domain, logger
from filter import *
def validateDomain(domain):
"""
Checks if the domain given is valid. It is valid if it returns a 200 status code
Args:
domain (str): The domain to check
Returns:
True if the domain is valid, False otherwise
"""
# Remove any http/https and www. from the domain, since they are not actually part of the domain
pattern = re.compile(r"[^a-zA-Z0-9.-]")
if pattern.match(domain):
return False
request = requests.get("https://" + domain)
if request.status_code in [200, 201, 202, 203, 204, 205, 206]:
return True
else:
return False
def checkFileExists(file):
if os.path.isfile(file):
return True
else:
return False
def loadFiles():
"""
Loads the directories and subdomains from the files
"""
with open("input_files/dirs_dictionary.bat", "r") as f1:
dirs = f1.read().lower().splitlines()
with open("input_files/subdomains_dictionary.bat", "r") as f2:
subdomains = f2.read().lower().splitlines()
dirs = list(set(dirs))
subdomains = list(set(subdomains))
return dirs, subdomains
def writeFiles(valid_dirs, valid_subdomains, files):
"""
Writes the valid directories, subdomains, and files to the output_files directory
If the directory output_files does not exist, it will be created
Args:
valid_dirs (list): List of valid directories
valid_subdomains (list): List of valid subdomains
files (list): List of all files found
Returns:
None
"""
directory = args.output_dir
if not os.path.exists(directory):
os.makedirs(directory)
logger.debug("Created directory: " + directory)
with open(f"{directory}/valid_dirs.bat", "w") as f1:
for folder in valid_dirs:
f1.write(folder + "\n")
logger.debug("Wrote valid directories to file: " + f1.name)
with open(f"{directory}/valid_subdomains.bat", "w") as f2:
for domain in valid_subdomains:
f2.write(domain + "\n")
logger.debug("Wrote valid subdomains to file: " + f2.name)
with open(f"{directory}/files.bat", "w") as f3:
for file in files:
f3.write(file + "\n")
logger.debug("Wrote all all files found to file: " + f3.name)
def crawlDirs(dirs, pbar, lock):
"""
Queries all the possible directories listed in the file given and returns
a list of directories that return a 202 status code
Args:
dirs (list): List of directories to query
lock (threading.Lock): Lock to prevent multiple threads from writing to the same file
pbar (tqdm.tqdm): Progress bar to show the progress of the script
Returns:
valid_dirs (list): List of valid directories
"""
valid_dirs = []
post_urls = []
for directory in dirs:
with lock:
count_dir.value += 1
pbar.update(1)
if count_dir.value % 10 == 0:
logger.debug(f"Checked {count_dir.value} directories")
if count_dir.value % 500 == 0:
logger.info(f"Checked {count_dir.value} directories")
url = f"https://{args.domain}/{directory}"
try:
request = requests.get(url)
except requests.exceptions.ConnectionError:
continue
if request.status_code in [200, 201, 202, 203, 204, 205, 206]:
valid_dirs.append(url)
logger.info(f"Found valid directory: {url}")
if not "Allow" in request.headers:
continue
if "POST" in request.headers.get("Allow"):
logger.debug(f"Found POST request at {url}")
result = handlePost(url)
if result == 0:
post_urls.append(result)
else:
continue
return valid_dirs, post_urls
def crawlDomain(subdomains, pbar, lock):
"""
Queries all the possible subdomains listed in the file given and returns
a list of subdomains that return a 202 status code
Args:
lock (threading.Lock): Lock to prevent multiple threads from writing to the same file
pbar (tqdm.tqdm): Progress bar to show the progress of the script
subdomains (list): List of subdomains to query
Returns:
valid_subdomains (list): List of valid subdomains
"""
valid_subdomains = []
post_urls = []
for subdomain in subdomains:
with lock:
count_domain.value += 1
pbar.update(1)
if count_domain.value % 100 == 0:
logger.debug(f"Checked {count_domain.value} subdomains")
if count_domain.value % 10000 == 0:
logger.info(f"Checked {count_domain.value} subdomains")
url = f"https://{subdomain}.{args.domain}"
if not checkUrl(url):
continue
try:
request = requests.get(url)
except requests.exceptions.ConnectionError:
continue
if request.status_code in [200, 201, 202, 203, 204, 205, 206]:
valid_subdomains.append(url)
logger.info(f"Found valid subdomain: {url}")
if not "Allow" in request.headers:
continue
if "POST" in request.headers.get("Allow"):
logger.debug(f"Found POST request at {url}")
result = handlePost(url)
if result == 0:
post_urls.append(result)
else:
continue
return valid_subdomains, post_urls
def getFiles(url):
"""
Scrapes the html for files that are being linked to in the html and stores them in a list
Args:
url (str): URL to scrape for files
Returns:
files (list): List of files found
"""
html = requests.get(url).text
links = getLinks(html)
files = getValidFiles(links)
logger.debug(f"Found {len(files)} files for {url}")
return files
def handlePost(url):
"""
Handles POST requests by checking if the POST request requires authentication
Args:
url (str): URL to check for POST request
Returns:
0 if POST request requires authentication
1 if POST request does not require authentication
"""
request = requests.post(url)
if request.status_code in [401, 403]:
logger.debug(f"{url} requires authentication, proceeding with brute force")
return 0
else:
logger.debug(f"{url} supports POST but does not require authentication")
return 1