public
Description: Analysis scripts for the Bro Intrusion Detection System
Homepage: http://www.bro-ids.org
Clone URL: git://github.com/sethhall/bro_scripts.git
bro_scripts / ssn-exposure.bro
100644 171 lines (146 sloc) 5.586 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
 
@load notice
 
module SSN;
 
export {
# You must redef this variable with a normalized list of SSNs.
# If you would like or require trivial obfuscation, you can set
# the use_md5_hashed_ssns variable to T and put md5 hashed SSNs
# into the list.
# Example for redefining this variable... (huge sets are handled easily)
# redef SSN::SSN_list = {"264439985", "351669087"};
const SSN_list: set[string] &redef;
 
# As commented above, set this to T if you are using hashed SSNs in your
# SSN_list variable.
const use_md5_hashed_ssns = F;
 
const ssn_log = open_log_file("ssn-exposure") &raw_output &redef;
 
redef enum Notice += {
SSN_Exposure,
SSN_MassExposure,
};
 
# The threshold of SSNs that you want to qualify as a mass disclosure.
# This allows you to only alarm on more significant disclosures (instead of people sending their own SSN).
# NOT CURRENTLY WORKING!
const mass_exposure_num = 5;
 
# Uncomment the following line if you'd like to use the signature based
# technique in this script.
#const use_ssn_sigs = T;
}
 
# This variable is for tracking the valid SSNs detected per-connection.
# This should catch SSN leakage over email.
global ssn_conn_tracker: table[conn_id] of set[string] &create_expire=5mins &default=function(id:conn_id):string_set { return set(); };
 
# This variable tracks the valid SSNs detected per-service.
# The idea is that this would catch SSN leakage through an SQL injection attack.
global ssn_serv_tracker: table[addr, port] of set[string] &create_expire=5mins &default=function(a:addr, p:port):string_set { return set(); };
 
@ifdef (use_ssn_sigs)
@load signatures
redef signature_files += "ssn.sig";
redef signature_actions += { ["ssn-match"] = SIG_IGNORE };
@else
# Conn is needed because of a small dependency between smtp.bro and conn.bro
@load conn
@load smtp
@load http-reply
@endif
 
#redef notice_action_filters += { [SSN::SSN_Exposure] = ignore_notice };
 
const ssn_regex = /[^0-9\/]\0?[0-6](\0?[0-9]){2}\0?[ \-]?(\0?[0-9]){2}\0?[ \-]?(\0?[0-9]){4}(\0?[[:blank:]\r\n<\"\'])/;
 
# This function is used for validating and notifying about SSNs in a string.
function check_ssns(c: connection, data: string): bool
{
local ssnps = find_all(data, ssn_regex);
 
for ( ssnp in ssnps )
{
# Make sure the number has either 2 hyphens, 2 spaces, or no separators.
if ( /[0-9]{3}-[0-9]{2}-[0-9]{4}/ in ssnp ||
/[0-9]{3} [0-9]{2} [0-9]{4}/ in ssnp ||
/[0-9]{9}/ in ssnp )
{
# Remove all non-numerics
local clean_ssnp = gsub(ssnp, /[^0-9]/, "");
# Strip off any leading chars
local ssn = sub_bytes(clean_ssnp, byte_len(clean_ssnp)-8, 9);
 
#print fmt("Checking on -%s-", ssn);
local hash_ssn: string;
if ( use_md5_hashed_ssns )
hash_ssn = md5_hash(ssn);
else
hash_ssn = ssn;
 
if ( hash_ssn in SSN_list )
{
local id = c$id;
#print fmt(" %s - Found it! (%s)", ssn, md5_hash(ssn));
add ssn_conn_tracker[id][ssn];
if ( |ssn_conn_tracker[id]| >= mass_exposure_num )
{
NOTICE([$note=SSN_MassExposure,
$conn=c,
$msg=fmt("More than %i SSNs disclosed in one connection.", mass_exposure_num)]);
}
else
{
NOTICE([$note=SSN_Exposure,
$conn=c,
$msg=fmt("Contents of disclosed ssn session: %s", data),
$sub=hash_ssn]);
}
 
print ssn_log, cat_sep("\t", "\\N", network_time(),
id$orig_h, fmt("%d", id$orig_p),
id$resp_h, fmt("%d", id$resp_p),
ssn, data);
return T;
}
}
}
return F;
}
 
# This is used if the mime event parsing technique is used.
# This seems to be the better technique if HTTP and SMTP are where
# the SSN leakage problems are in your environment (especially if you
# are able to use DPD).
event smtp_data(c: connection, is_orig: bool, data: string)
{
# Only looking at outbound SMTP.
if ( is_orig && is_local_addr(c$id$orig_h) &&
c$start_time > network_time()-10secs && ssn_regex in data )
check_ssns(c, data);
}
 
event http_entity_data(c: connection, is_orig: bool, length: count, data: string)
{
# We're looking for outbound POST data and outbound HTTP data contents.
if ( ((is_local_addr(c$id$resp_h) && !is_orig) || (is_local_addr(c$id$orig_h) && is_orig)) &&
c$start_time > network_time()-10secs && ssn_regex in data )
check_ssns(c, data);
}
 
# This event is broken in Robin's branch currently
#event mime_all_data(c: connection, length: count, data: string)
# {
# # Only check the regex for connections younger than 10 seconds.
# # This helps avoid load during large and/or long connections.
# if ( c$start_time > network_time()-10secs && ssn_regex in data )
# check_ssns(c, data);
# }
 
# This is used if the signature based technique is in use
function validate_ssn_match(state: signature_state, data: string): bool
{
# TODO: Don't handle HTTP data this way. Should return F if this is
# an http/smtp session and handle the relevant *_data events.
if ( /^GET/ in data )
return F;
 
return check_ssns(state$conn, data);
}
 
# Change this to only run when not analyzing traffic.
# Consider it a test for this script.
#event bro_init()
# {
#
# local blah = "GET as df12345-6789 ads 100000000 f asdf asd f 000287-76-2154f this is a test";
#
# local ssns = find_ssns(blah);
# for ( i in ssns )
# {
# print fmt("Checking on %s", ssns[i]);
# if ( md5_hash(ssns[i]) in SSN_list )
# {
# print fmt(" %s - Found it! (%s)", ssns[i], md5_hash(ssns[i]));
# }
# }
#
# }