In [1]:
import pandas as pd
from tls_paper_funcs import tls_12_appdata_filtering, remove_empty

In [2]:
mta_folder = 'data/mta/'
mta2020 = pd.read_csv(mta_folder + 'joy-tls-ver-suricata.csv', engine='python')

In [3]:
# Our GT:
new_gt = pd.read_csv(mta_folder + 'in-house-gt.csv')

# SSLBLs GT:
def sslbl_to_fp(s):
    return ':'.join(s[i:i+2] for i in range(0, len(s), 2))

sslbl = pd.read_csv(mta_folder + 'sslbl-2022-05-23.csv', engine='python').iloc[::-1].reset_index(drop=True)
sslbl['cert_fp'] = sslbl['SHA1'].map(sslbl_to_fp)
sslbl['comment'] = sslbl['Listingdate'].astype(str).apply(lambda x: 'Listingdate: '+x)
sslbl['new_label'] = sslbl['Listingreason'].apply(lambda x: 'SSLBL '+x)

# Combining the two
groundtruth = pd.concat([new_gt, sslbl[['new_label', 'cert_fp', 'comment']] ], ignore_index=True)
groundtruth['c2'] = 1
groundtruth

Unnamed: 0,new_label,cert_fp,comment,c2
0,TrickBot_TLS,55:f2:dd:78:9c:1b:e7:bc:b4:cd:4a:e7:d7:2a:db:e...,https://www.malware-traffic-analysis.net/2018/...,1
1,TrickBot_TLS,47:6c:dd:3c:4d:f3:47:26:26:ae:76:b8:de:1c:be:5...,https://www.malware-traffic-analysis.net/2019/...,1
2,TrickBot_TLS,f7:0f:36:a1:58:67:a5:24:4a:00:92:5d:ae:7c:61:4...,https://www.virustotal.com/gui/ip-address/185....,1
3,TrickBot_TLS,d9:e9:27:23:9c:6b:f5:06:52:40:22:63:8e:00:f6:8...,https://www.malware-traffic-analysis.net/2018/...,1
4,TrickBot_TLS,93:82:53:16:dc:6d:f1:5a:be:49:eb:33:92:d3:64:f...,https://isc.sans.edu/forums/diary/Malspam+with...,1
...,...,...,...,...
4905,SSLBL AsyncRAT C&C,76:65:4e:b6:5d:bf:b6:e0:52:1b:19:43:ed:be:8f:f...,Listingdate: 2022-05-20 05:16:52,1
4906,SSLBL Neurevt C&C,a7:18:8f:3a:c0:47:85:82:e9:02:a4:0f:1a:a0:99:d...,Listingdate: 2022-05-20 12:33:47,1
4907,SSLBL DCRat C&C,65:7a:3d:40:2a:b1:77:51:d6:ad:1b:71:2a:06:f0:f...,Listingdate: 2022-05-22 12:07:06,1
4908,SSLBL AsyncRAT C&C,9a:79:97:4e:66:4d:4c:a4:1a:68:b9:f1:ee:a1:d6:e...,Listingdate: 2022-05-23 06:08:07,1


In [4]:
#labeled_mta = pd.concat([ mta_2020[relevant_columns], mta_2022[relevant_columns] ]).merge(groundtruth, on='cert_fp', how='left')
labeled_mta = mta2020.merge(groundtruth, on='cert_fp', how='left')
labeled_mta['c2'].fillna(0, inplace=True)
labeled_mta['source'] = 'MTA'
labeled_mta['c2'].value_counts()

c2
0.0    22117
1.0    18153
Name: count, dtype: int64

In [5]:
labeled_mta['c_tls_version'] = labeled_mta['c_tls_version'].astype(int)
labeled_mta['s_tls_version'] = labeled_mta['s_tls_version'].astype(int)
labeled_mta['c_supported_versions'] = labeled_mta['c_supported_versions'].astype(str).apply(lambda x: x.split('.')[0])
labeled_mta['s_supported_versions'] = labeled_mta['s_supported_versions'].astype(str).apply(lambda x: x.split('.')[0])

In [6]:
tls_version_cols = ['c_tls_version', 's_tls_version', 'c_supported_versions', 's_supported_versions']
labeled_mta[labeled_mta['tls_version'] == 'TLS 1.3'][tls_version_cols].groupby(tls_version_cols).size()

c_tls_version  s_tls_version  c_supported_versions    s_supported_versions
5              -1             080304030303020301      -1                       2
                5             0403040303              304                      6
                              080304030303020301      304                     29
                              0a0a0a0304030303020301  304                     26
                              0a1a1a0304030303020301  304                     28
                              0a2a2a0304030303020301  304                     25
                              0a3a3a0304030303020301  304                     32
                              0a4a4a0304030303020301  304                     31
                              0a5a5a0304030303020301  304                     26
                              0a6a6a0304030303020301  304                     37
                              0a7a7a0304030303020301  304                     16
                              0a8a

In [7]:
# c_tls_version == s_tls_version = 5, c_supported_versions != (-1, '') s_supported_versions != -1 (== 7f17, fb1a, 304)
# TLS 1.3
labeled_mta[(labeled_mta['c_tls_version']==5)&(labeled_mta['s_tls_version']==5)&
            (labeled_mta['c_supported_versions']!='-1')&(labeled_mta['c_supported_versions']!='')&
            ((labeled_mta['s_supported_versions'] == '7f17')|(labeled_mta['s_supported_versions'] == 'fb1a')|
             (labeled_mta['s_supported_versions'] == '304')|
             (labeled_mta['s_supported_versions'] == 304))]['tls_version'].value_counts()

tls_version
TLS 1.3                471
TLS 1.3 draft-26-fb     18
UNDETERMINED            12
TLS 1.3 draft-23         3
Name: count, dtype: int64

In [8]:
labeled_mta[~((labeled_mta['c_tls_version']==5)&(labeled_mta['s_tls_version']==5)&
            (labeled_mta['c_supported_versions']!='-1')&(labeled_mta['c_supported_versions']!='')&
            ((labeled_mta['s_supported_versions'] == '7f17')|(labeled_mta['s_supported_versions'] == 'fb1a')|
             (labeled_mta['s_supported_versions'] == '304')|
             (labeled_mta['s_supported_versions'] == 304)))]['tls_version'].value_counts()

tls_version
TLS 1.2         21646
TLSv1           16854
UNDETERMINED       73
TLS 1.1            33
TLS 1.3             2
Name: count, dtype: int64

In [9]:
labeled_mta['tls_version_guess'] = 'TLS 1.2'
labeled_mta.loc[(labeled_mta['c_tls_version']==5)&(labeled_mta['s_tls_version']==5)&
            (labeled_mta['c_supported_versions']!='-1')&(labeled_mta['c_supported_versions']!='')&
            ((labeled_mta['s_supported_versions'] == '7f17')|(labeled_mta['s_supported_versions'] == 'fb1a')|
             (labeled_mta['s_supported_versions'] == '304')|
             (labeled_mta['s_supported_versions'] == 304)),'tls_version_guess'] = 'TLS 1.3'
labeled_mta['tls_version_guess'].value_counts()

tls_version_guess
TLS 1.2    39752
TLS 1.3      518
Name: count, dtype: int64

In [10]:
labeled_mta['session_resumed'].value_counts()

session_resumed
False    36538
True      2574
Name: count, dtype: int64

In [11]:
labeled_mta[(labeled_mta['session_resumed']==False) & (labeled_mta['tls_version_guess']=='TLS 1.3')]

Unnamed: 0,timestamp,cert_fp,ja3,ja3s,notafter,notbefore,subject,issuerdn,tls_version,session_resumed,...,tls_tp_18,tls_tp_19,file,sni_joy,_merge,new_label,comment,c2,source,tls_version_guess
10251,2018-07-05 17:17:44.658382+00:00,,14eff09463c9d6dc6e14e954eba239c2,eb1d94daa7e0344597e756a1fb6e7054,,,,,TLS 1.3 draft-23,False,...,23.0,23.0,1977.0,static.addtoany.com,both,,,0.0,MTA,TLS 1.3
10254,2018-07-05 17:17:44.738482+00:00,,14eff09463c9d6dc6e14e954eba239c2,f4febc55ea12b31ae17cfb7e614afda8,,,,,TLS 1.3 draft-23,False,...,23.0,23.0,1977.0,connect.facebook.net,both,,,0.0,MTA,TLS 1.3
10259,2018-07-05 17:17:45.448274+00:00,,14eff09463c9d6dc6e14e954eba239c2,f4febc55ea12b31ae17cfb7e614afda8,,,,,TLS 1.3 draft-23,False,...,23.0,-1.0,1977.0,www.facebook.com,both,,,0.0,MTA,TLS 1.3
21599,2019-06-12 19:55:43.493744+00:00,,7a29c223fb122ec64d10f0a159e07996,f4febc55ea12b31ae17cfb7e614afda8,,,,,TLS 1.3 draft-26-fb,False,...,-1.0,-1.0,2133.0,api.facebook.com,both,,,0.0,MTA,TLS 1.3
21600,2019-06-12 19:55:43.971901+00:00,,7a29c223fb122ec64d10f0a159e07996,f4febc55ea12b31ae17cfb7e614afda8,,,,,TLS 1.3 draft-26-fb,False,...,23.0,23.0,2133.0,b-graph.facebook.com,both,,,0.0,MTA,TLS 1.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38726,2019-11-11 17:13:10.056070+00:00,,66918128f1b9b03303d77c6f2eefd128,eb1d94daa7e0344597e756a1fb6e7054,,,,,TLS 1.3,False,...,-1.0,-1.0,2189.0,ssl.gstatic.com,both,,,0.0,MTA,TLS 1.3
38727,2019-11-11 17:13:10.056751+00:00,,66918128f1b9b03303d77c6f2eefd128,eb1d94daa7e0344597e756a1fb6e7054,,,,,TLS 1.3,False,...,-1.0,-1.0,2189.0,ssl.gstatic.com,both,,,0.0,MTA,TLS 1.3
38728,2019-11-11 17:13:10.103126+00:00,,66918128f1b9b03303d77c6f2eefd128,eb1d94daa7e0344597e756a1fb6e7054,,,,,TLS 1.3,False,...,-1.0,-1.0,2189.0,ssl.gstatic.com,both,,,0.0,MTA,TLS 1.3
38729,2019-11-11 17:13:50.259049+00:00,,554719594ba90b02ae410c297c6e50ad,2b0648ab686ee45e0e7c35fcfb0eea7e,,,,,TLS 1.3,False,...,23.0,23.0,2189.0,play.google.com,both,,,0.0,MTA,TLS 1.3


In [12]:
labeled_mta['tls_dir_19'].value_counts()

tls_dir_19
-1.0    33030
 1.0     6302
 0.0      937
-2.0        1
Name: count, dtype: int64

In [13]:
# Number of flows that do not reach the 20 record mark:
33030 / (33030 + 6302+937+1)

0.8202135584802582

In [14]:
# Number of flows that were came from our labeling (not SSLBL):
([not x.startswith("SSLBL") for x in labeled_mta['new_label'].value_counts().index.values] * labeled_mta['new_label'].value_counts()).sum()

6910

## MTA Certs

In [15]:
mta2020_certs = pd.read_csv(mta_folder + 'certificates_length_mta.csv', engine='python')
a = mta2020_certs.groupby(['ip.src', 'tcp.srcport', 'ip.dst', 'tcp.dstport', 'fileindex']).size()
for item in a[a>1].index:
    mta2020_certs.drop(mta2020_certs[(mta2020_certs[['ip.src', 'tcp.srcport', 'ip.dst', 'tcp.dstport', 'fileindex']] == item).all(axis=1)].iloc[1:].index, inplace=True)
mta2020_certs

Unnamed: 0,frame.time_epoch,ip.src,tcp.srcport,ip.dst,tcp.dstport,tls.handshake.certificates_length,fileindex
0,1.374195e+09,91.228.53.137,443,192.168.204.150,54627,847,4
1,1.374195e+09,91.228.53.137,443,192.168.204.150,54628,847,4
2,1.374195e+09,173.224.210.244,443,192.168.204.150,54631,847,4
3,1.374195e+09,173.224.210.244,443,192.168.204.150,54633,847,4
4,1.374195e+09,91.228.53.137,443,192.168.204.150,54632,847,4
...,...,...,...,...,...,...,...
36840,1.602876e+09,52.143.84.45,443,10.10.16.102,49831,2142,2345
36841,1.602876e+09,165.232.40.86,443,10.10.16.102,49832,906,2345
36842,1.602876e+09,20.190.9.86,443,10.10.16.102,49833,2142,2345
36843,1.602876e+09,165.232.40.86,443,10.10.16.102,49834,906,2345


In [16]:
mta2020_certs_merge = mta2020_certs.merge(labeled_mta[labeled_mta['c2']==1][['s_ip','s_port','c_ip','c_port','file', 'c2', 'cert_fp', 'new_label']],
                    left_on=['ip.src', 'tcp.srcport', 'ip.dst', 'tcp.dstport', 'fileindex'], 
                    right_on=['s_ip','s_port','c_ip','c_port','file'], how='left')
mta2020_certs_merge['c2'].fillna(0, inplace=True)
mta2020_certs_merge

Unnamed: 0,frame.time_epoch,ip.src,tcp.srcport,ip.dst,tcp.dstport,tls.handshake.certificates_length,fileindex,s_ip,s_port,c_ip,c_port,file,c2,cert_fp,new_label
0,1.374195e+09,91.228.53.137,443,192.168.204.150,54627,847,4,,,,,,0.0,,
1,1.374195e+09,91.228.53.137,443,192.168.204.150,54628,847,4,,,,,,0.0,,
2,1.374195e+09,173.224.210.244,443,192.168.204.150,54631,847,4,,,,,,0.0,,
3,1.374195e+09,173.224.210.244,443,192.168.204.150,54633,847,4,,,,,,0.0,,
4,1.374195e+09,91.228.53.137,443,192.168.204.150,54632,847,4,,,,,,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36834,1.602876e+09,52.143.84.45,443,10.10.16.102,49831,2142,2345,,,,,,0.0,,
36835,1.602876e+09,165.232.40.86,443,10.10.16.102,49832,906,2345,165.232.40.86,443.0,10.10.16.102,49832.0,2345.0,1.0,46:4a:78:7c:b8:b0:7f:07:87:4f:2d:3b:85:30:07:f...,IcedID_TLS
36836,1.602876e+09,20.190.9.86,443,10.10.16.102,49833,2142,2345,,,,,,0.0,,
36837,1.602876e+09,165.232.40.86,443,10.10.16.102,49834,906,2345,165.232.40.86,443.0,10.10.16.102,49834.0,2345.0,1.0,46:4a:78:7c:b8:b0:7f:07:87:4f:2d:3b:85:30:07:f...,IcedID_TLS


## MTA TLS Data

In [17]:
mta_tlsdata = tls_12_appdata_filtering(labeled_mta[labeled_mta['tls_version_guess'] == 'TLS 1.2'])
mta_tlsdata['c2'] = labeled_mta[labeled_mta['tls_version_guess'] == 'TLS 1.2']['c2'].values
remove_empty(mta_tlsdata, True)
display(mta_tlsdata['c2'].value_counts())

5905


c2
0.0    18681
1.0    15166
Name: count, dtype: int64

In [18]:
id_cols = ['timestamp', 'cert_fp', 'ja3', 'ja3s', 'notafter', 'notbefore', 'subject', 'issuerdn', 'tls_version', 'session_resumed', 'sni_suricata', 'c_ip', 'c_port', 's_ip', 's_port', 'ip_proto', 'start_time', 'c_tls_version', 's_tls_version', 'c_supported_versions', 's_supported_versions',  'file', 'sni_joy', '_merge', 'new_label', 'comment', 'c2', 'source', 'tls_version_guess']
mta_tlsdata[id_cols] = labeled_mta[id_cols]
mta_tlsdata

Unnamed: 0,tls_b_0,tls_b_1,tls_b_2,tls_b_3,tls_b_4,tls_b_5,tls_b_6,tls_b_7,tls_b_8,tls_b_9,...,s_tls_version,c_supported_versions,s_supported_versions,file,sni_joy,_merge,new_label,comment,source,tls_version_guess
3,560.0,224.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,4.0,u7l359jww7v2x3dp.ohtheigh.cc,both,,,MTA,TLS 1.2
4,384.0,16416.0,9216.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,4.0,7pk7zf52f7mshkx.ohtheigh.cc,both,,,MTA,TLS 1.2
7,320.0,240.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,4.0,cm34717.ohtheigh.cc,both,,,MTA,TLS 1.2
8,336.0,8784.0,224.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,4.0,ivl51exuuxu.ohtheigh.cc,both,,,MTA,TLS 1.2
10,512.0,2320.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,4.0,yqitxnvlyjeci.ohtheigh.cc,both,,,MTA,TLS 1.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40233,1552.0,176.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,3,-1,-1,2287.0,,right_only,,,MTA,TLS 1.2
40262,40.0,61.0,27.0,30.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1,80304030303020301,-1,2331.0,mx1.globalegrow.com,right_only,,,MTA,TLS 1.2
40263,40.0,89.0,80.0,57.0,30.0,288.0,-1.0,-1.0,-1.0,-1.0,...,5,80304030303020301,-1,2331.0,oxalisvietnam.com,right_only,,,MTA,TLS 1.2
40264,87.0,44.0,45.0,39.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1,80304030303020301,-1,2331.0,imaps.villaresyasociados.com,right_only,,,MTA,TLS 1.2


## Export to CSVs:

In [19]:
#labeled_mta.to_csv('processed-datasets/labeled_mta.csv', index=False)
#mta_tlsdata.to_csv('processed-datasets/mta_tlsdata.csv', index=False)
#mta2020_certs_merge.to_csv('processed-datasets/mta2020_certs_merge.csv', index=False)