Skip to content

Commit

Permalink
blacklists.py: implement YAML formats for external IP/NS/ASN lists
Browse files Browse the repository at this point in the history
findspam.py: move IP/NS/ASN lists to global variables managed as above

{blacklist,watch}ed_{cidr,nse,asn}s.yml: external YAML files with this data
  • Loading branch information
tripleee committed Dec 20, 2019
1 parent bcf4f9b commit d545b3e
Show file tree
Hide file tree
Showing 7 changed files with 951 additions and 554 deletions.
24 changes: 24 additions & 0 deletions blacklisted_cidrs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Schema: yaml_cidr
Schema_version: '2019120601'
items:
- ip: 104.219.248.45
- ip: 192.64.118.108
- ip: 198.54.116.110
- ip: 198.54.120.134
- ip: 198.187.29.252
- ip: 198.187.31.245
- ip: 138.201.185.58
- ip: 75.119.210.224
- ip: 23.229.217.167
- ip: 104.25.50.105
- ip: 107.180.3.93
- ip: 107.180.24.240
- ip: 107.180.59.131
- ip: 107.180.78.164
- ip: 132.148.29.42
- ip: 160.153.75.129
- ip: 69.167.167.150
- ip: 172.96.187.196
- ip: 162.241.216.230
- ip: 116.206.104.141
- ip: 162.251.85.146
42 changes: 42 additions & 0 deletions blacklisted_nses.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
Schema: yaml_ns
Schema_version: '2019120601'
items:
- ns:
- ns1.md-95.bigrockservers.com.
- ns2.md-95.bigrockservers.com.
- ns:
- ns1.md-99.bigrockservers.com.
- ns2.md-99.bigrockservers.com.
- ns:
- apollo.ns.cloudflare.com.
- liz.ns.cloudflare.com.
- ns:
- ara.ns.cloudflare.com.
- greg.ns.cloudflare.com.
- ns:
- brenda.ns.cloudflare.com.
- merlin.ns.cloudflare.com.
- ns:
- chip.ns.cloudflare.com.
- lola.ns.cloudflare.com.
- ns:
- jay.ns.cloudflare.com.
- jule.ns.cloudflare.com.
- ns:
- lee.ns.cloudflare.com.
- ulla.ns.cloudflare.com.
- ns:
- lloyd.ns.cloudflare.com.
- reza.ns.cloudflare.com.
- ns: 247support-number.com.
- ns: promoocodes.com.
- ns: myassignmenthelp.co.uk.
- ns: socialmonkee.com.
- ns: aapkeaajanese.website.
- ns: healthymum.org.
- ns: escortdomain.net.
- ns: dnsdomen.com.
- ns: letter.org.in.
- ns: utecho.com.
- ns: siteground.asia.
- ns: ddos-guard.net.
169 changes: 168 additions & 1 deletion blacklists.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# coding=utf-8
from typing import Union
import regex
import yaml

from globalvars import GlobalVars
from helpers import log


def load_blacklists():
GlobalVars.bad_keywords = Blacklist(Blacklist.KEYWORDS).parse()
GlobalVars.watched_keywords = Blacklist(Blacklist.WATCHED_KEYWORDS).parse()
GlobalVars.blacklisted_websites = Blacklist(Blacklist.WEBSITES).parse()
GlobalVars.blacklisted_usernames = Blacklist(Blacklist.USERNAMES).parse()
GlobalVars.blacklisted_numbers = Blacklist(Blacklist.NUMBERS).parse()
GlobalVars.watched_keywords = Blacklist(Blacklist.WATCHED_KEYWORDS).parse()
GlobalVars.watched_numbers = Blacklist(Blacklist.WATCHED_NUMBERS).parse()
GlobalVars.blacklisted_nses = Blacklist(Blacklist.NSES).parse()
GlobalVars.watched_nses = Blacklist(Blacklist.WATCHED_NSES).parse()
GlobalVars.blacklisted_cidrs = Blacklist(Blacklist.CIDRS).parse()
GlobalVars.watched_cidrs = Blacklist(Blacklist.WATCHED_CIDRS).parse()
# GlobalVars.blacklisted_asns = Blacklist(Blacklist.ASNS).parse()
GlobalVars.watched_asns = Blacklist(Blacklist.WATCHED_ASNS).parse()


class BlacklistParser:
Expand Down Expand Up @@ -140,17 +147,177 @@ def exists(self, item: Union[str, dict]):
splat = x.split('\t')
if len(splat) == 3 and splat[2].strip() == item:
return True, i
return False, -1


class YAMLParserCIDR(BlacklistParser):
"""
YAML parser for IP blacklist (name suggests we should move to proper CIDR eventually).
Base class for parsers for YAML files with simple schema validation.
"""
# Remember to update the schema version if any of this needs to be changed
SCHEMA_VERSION = '2019120601' # yyyy mm dd id
SCHEMA_VARIANT = 'yaml_cidr'
SCHEMA_PRIKEY = 'ip'

def __init__(self, filename):
super().__init__(filename)

def _parse(self, keep_disabled=False):
with open(self._filename, 'r', encoding='utf-8') as f:
y = yaml.safe_load(f)
if y['Schema'] != self.SCHEMA_VARIANT:
raise ValueError('Schema variant: got {0}, but expected {1}'.format(
y['Schema'], self.SCHEMA_VARIANT))
if y['Schema_version'] > self.SCHEMA_VERSION:
raise ValueError('Schema version {0} is bigger than supported {1}'.format(
y['Schema_version'], self.SCHEMA_VERSION))
for item in y['items']:
if not keep_disabled and item.get('disable'):
continue
yield item

def parse(self):
return [item[self.SCHEMA_PRIKEY] for item in self._parse()]

def _write(self, callback):
d = {
'Schema': self.SCHEMA_VARIANT,
'Schema_version': self.SCHEMA_VERSION,
'items': sorted(
self._parse(keep_disabled=True),
key=lambda x: x[self.SCHEMA_PRIKEY])
}
callback(d)
with open(self._filename, 'w', encoding='utf-8') as f:
yaml.dump(d, f)

def _validate(self, item):
ip_regex = regex.compile(r'''
(?(DEFINE)(?P<octet>
1[0-9]{0,2}|2(?:[0-4][0-9])??|25[0-5]?|2[6-9]|[3-9][0-9]?))
^(?&octet)(?:\.(?&octet)){3}$''', regex.X)

if 'ip' in item:
if not ip_regex.match(item['ip']):
raise ValueError('Field "ip" is not a valid IP address: {0}'.format(
item['ip']))
'''
if 'cidr' in item:
raise ValueError(
'Cannot have both "ip" and "cidr" members: {0!r}'.format(item))
elif 'cidr' in item:
if not 'base' in item['cidr'] or not 'mask' in item['cidr']:
raise ValueError('Field "cidr" must have members "base" and "mask"')
if not ip_regex.match(item['cidr']['base']):
raise ValueError('Field "base" is not a valid IP address: {0}'.format(
item['cidr']['base']))
mask = int(item['cidr']['mask'])
if mask < 0 or mask > 32:
raise ValueError('Field "mask" must be between 0 and 32: {0}'.format(
item['cidr']['mask']))
'''
else:
raise ValueError('Item needs to have an "ip" member field: {0!r}'.format(item))

def add(self, item):
self._validate(item)
prikey = self.SCHEMA_PRIKEY

def add_callback(d):
for compare in d['items']:
if compare[prikey] == item[prikey]:
raise ValueError('{0} already in list {1}'.format(
item[prikey], d['items']))
d['items'].append(item)

self._write(add_callback)

def remove(self, item):
prikey = self.SCHEMA_PRIKEY

def remove_callback(d):
for i, compare in enumerate(d['items']):
if compare[prikey] == item[prikey]:
break
else:
raise ValueError('No {0} found in list {1}'.format(
item[prikey], d['items']))
del d['items'][i]

self._write(remove_callback)

# FIXME: enumerate gets YAML item array index, not line number
def each(self, with_info=False):
for i, item in enumerate(self.parse(), start=1):
if with_info:
yield item, (i, self._filename)
else:
yield item

def exists(self, item):
item = item.lower()
for i, rec in self.each(with_info=True):
if item == rec:
return True, i
return False, -1


class YAMLParserNS(YAMLParserCIDR):
"""
YAML parser for name server blacklists.
"""
SCHEMA_VARIANT = 'yaml_ns'
SCHEMA_PRIKEY = 'ns'

def _validate(self, item):
host_regex = regex.compile(r'^([a-z0-9][-a-z0-9]*\.){2,}$')
if 'ns' not in item:
raise ValueError('Item must have member field "ns": {0!r}'.format(item))
if isinstance(str, item['ns']):
if not host_regex.match(item['ns']):
raise ValueError(
'{0} does not look like a valid host name'.format(item['ns']))
elif isinstance(list, item['ns']):
for ns in item['ns']:
if not host_regex.match(ns):
raise ValueError(
'{0} does not look like a valid host name'.format(ns))
else:
raise ValueError(
'Member "ns" must be either string or list of strings: {0!r}'.format(
item['ns']))


class YAMLParserASN(YAMLParserCIDR):
"""
YAML parser for ASN blacklists.
"""
SCHEMA_VARIANT = 'yaml_asn'
SCHEMA_PRIKEY = 'asn'

def _validate(self, item):
if 'asn' not in item:
raise ValueError('Item must have member field "asn": {0!r}'.format(item))
asn = int(item['asn'])
if asn <= 0 or asn >= 4_200_000_000 or 64496 <= asn <= 131071 or asn == 23456:
raise ValueError('Not a valid public AS number: {0}'.format(asn))


class Blacklist:
KEYWORDS = ('bad_keywords.txt', BasicListParser)
WEBSITES = ('blacklisted_websites.txt', BasicListParser)
USERNAMES = ('blacklisted_usernames.txt', BasicListParser)
NUMBERS = ('blacklisted_numbers.txt', BasicListParser)
WATCHED_KEYWORDS = ('watched_keywords.txt', TSVDictParser)
WATCHED_NUMBERS = ('watched_numbers.txt', TSVDictParser)
NSES = ('blacklisted_nses.yml', YAMLParserNS)
WATCHED_NSES = ('watched_nses.yml', YAMLParserNS)
CIDRS = ('blacklisted_cidrs.yml', YAMLParserCIDR)
WATCHED_CIDRS = ('watched_cidrs.yml', YAMLParserCIDR)
# ASNS = ('blacklisted_asns.yml', YAMLParserASN)
WATCHED_ASNS = ('watched_asns.yml', YAMLParserASN)

def __init__(self, type):
self._filename = type[0]
Expand Down
Loading

0 comments on commit d545b3e

Please sign in to comment.