Skip to content

Commit 6ceed1e

Browse files
committed
Add community-id, a direct calculator for the ID
The idea here is to support a growing list of input formats to directly calculate the ID for a given flow tuple. Currently supports two formats, one of which is that of Zeek logs in their TSV ordering.
1 parent 8b823e0 commit 6ceed1e

File tree

5 files changed

+157
-1
lines changed

5 files changed

+157
-1
lines changed

communityid/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,30 @@
88
from communityid.algo import FlowTuple
99
from communityid.algo import CommunityID
1010
from communityid.algo import PROTO_ICMP, PROTO_TCP, PROTO_UDP, PROTO_ICMP6, PROTO_SCTP
11+
12+
def get_proto(proto):
13+
"""
14+
Returns the appropriate PROTO_xxx constant for the given protocol,
15+
or None if the protocol wasn't understood.
16+
17+
The input type can either be a string ("TCP", "UDP", etc) or the
18+
IP protocol number (e.g., 6 for TCP)
19+
"""
20+
try:
21+
if int(proto) in (PROTO_ICMP, PROTO_TCP, PROTO_UDP, PROTO_ICMP6, PROTO_SCTP):
22+
return int(proto)
23+
except ValueError:
24+
pass
25+
26+
protos = {
27+
"ICMP": PROTO_ICMP,
28+
"ICMP6": PROTO_ICMP6,
29+
"SCTP": PROTO_SCTP,
30+
"TCP": PROTO_TCP,
31+
"UDP": PROTO_UDP,
32+
}
33+
34+
try:
35+
return protos[proto.upper()]
36+
except KeyError:
37+
return None

scripts/community-id

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#! /bin/env python
2+
"""
3+
This script lets you compute Community ID values for specific flow tuples.
4+
You provide the tuple parts, it provides the ID.
5+
"""
6+
import abc
7+
import argparse
8+
import socket
9+
import sys
10+
11+
import communityid
12+
13+
class TupleParser:
14+
@abc.abstractmethod
15+
def parse(self, parts):
16+
"""
17+
Parses the given line parts list into a FlowTuple, or None on error.
18+
"""
19+
return None
20+
21+
@staticmethod
22+
def is_ipaddr(val):
23+
try:
24+
socket.inet_aton(val)
25+
return True
26+
except socket.error:
27+
return False
28+
29+
@staticmethod
30+
def is_port(val):
31+
try:
32+
port = int(val)
33+
return 0 <= port <= 65535
34+
except ValueError:
35+
return False
36+
37+
class DefaultParser(TupleParser):
38+
"""
39+
Our default parser wants the protocol first, then the
40+
saddr/daddr/sport/dport tuple.
41+
"""
42+
def parse(self, parts):
43+
if len(parts) != 5:
44+
return None
45+
46+
proto = communityid.get_proto(parts[0])
47+
if proto is None:
48+
return None
49+
50+
if not (self.is_ipaddr(parts[1]) and
51+
self.is_ipaddr(parts[2]) and
52+
self.is_port(parts[3]) and
53+
self.is_port(parts[4])):
54+
return None
55+
56+
return communityid.FlowTuple(proto, parts[1], parts[2],
57+
int(parts[3]), int(parts[4]))
58+
59+
class ZeekLogsParser(TupleParser):
60+
"""
61+
In Zeek's logs the field order is saddr/sport/daddr/dport/proto.
62+
"""
63+
def parse(self, parts):
64+
if len(parts) != 5:
65+
return None
66+
67+
proto = communityid.get_proto(parts[4])
68+
if proto is None:
69+
return None
70+
71+
if not (self.is_ipaddr(parts[0]) and
72+
self.is_port(parts[1]) and
73+
self.is_ipaddr(parts[2]) and
74+
self.is_port(parts[3])):
75+
return None
76+
77+
return communityid.FlowTuple(proto, parts[0], parts[2],
78+
int(parts[1]), int(parts[3]))
79+
80+
def main():
81+
parser = argparse.ArgumentParser(
82+
formatter_class=argparse.RawDescriptionHelpFormatter,
83+
description="""Community ID calculator
84+
85+
This calculator prints the Community ID value for a given tuple
86+
to stdout. It supports the following formats for the tuple:
87+
88+
[protocol] [src address] [dst address] [src port] [dst port]
89+
[src address] [src port] [dst address] [dst port] [protocol]
90+
91+
The protocol is either a numeric IP protocol number, or one of
92+
the constants "icmp", "icmp6", "tcp", "udp", or "sctp". Case
93+
does not matter.
94+
""")
95+
parser.add_argument('--seed', type=int, default=0, metavar='NUM',
96+
help='Seed value for hash operations')
97+
parser.add_argument('--no-base64', action='store_true', default=False,
98+
help="Don't base64-encode the SHA1 binary value")
99+
parser.add_argument('flowtuple', nargs=argparse.REMAINDER,
100+
help='Flow tuple, in one of the forms described above')
101+
args = parser.parse_args()
102+
103+
if not args.flowtuple:
104+
print('Need flow tuple as additional arguments.')
105+
return 1
106+
107+
commid = communityid.CommunityID(args.seed, not args.no_base64)
108+
109+
for parser in (DefaultParser(), ZeekLogsParser()):
110+
tpl = parser.parse(args.flowtuple)
111+
if tpl is None:
112+
continue
113+
114+
res = commid.calc(tpl)
115+
116+
if res is None:
117+
print(commid.get_error())
118+
return 1
119+
120+
print(res)
121+
return 0
122+
123+
return 1
124+
125+
if __name__ == '__main__':
126+
sys.exit(main())

scripts/community-id-pcap

100644100755
File mode changed.

scripts/community-id-tcpdump

100644100755
File mode changed.

setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
long_description_content_type="text/markdown",
1414
url="https://github.com/corelight/pycommunityid",
1515
packages=['communityid'],
16-
scripts=['scripts/community-id-pcap', 'scripts/community-id-tcpdump'],
16+
scripts=[
17+
'scripts/community-id',
18+
'scripts/community-id-pcap',
19+
'scripts/community-id-tcpdump'],
1720
test_suite="tests.communityid_test",
1821
classifiers=[
1922
"Programming Language :: Python :: 3",

0 commit comments

Comments
 (0)