''' Python script to read in the Cisco Umbrella Top 1M list and check against a file containing domains (1 domain per line)
__author__ = James Condon
import csv
# csv from
umbrella_fname = "top-1m.csv"
# txt file containing the domains to check against
domain_fname = "domain_examples.txt"
print "Loading Cisco Umbrella Top 1M List..."
with open(umbrella_fname, 'rb') as f:
reader = csv.reader(f)
top_1m = map(tuple,reader)
print "Loading Domain List..."
with open(domain_fname) as f:
domains = f.readlines()
domains = [x.strip() for x in domains]
found_domains = []
not_found_domains = []
print "Checking Domains..."
for domain in domains:
found = False
for ranking in top_1m:
if domain == ranking[1]:
found = True
if found == False:
print "*** Domains Not Found in Top 1 Million ***"
for entry in not_found_domains:
print entry
print "\n*** Domains Found in Top 1 Million ***\nRank - Domain"
for entry in sorted(found_domains, key=lambda tup: int(tup[0])):
print "%s - %s" % (entry[0],entry[1])