In [None]:
from EIMTC.extractor import Extractor
from EIMTC.plugins.protocol_header_fields import ProtocolHeaderFields
from EIMTC.plugins.stnn import STNN
from EIMTC.plugins.n_pkts_byte_freq import NPacketsByteFrequency
from EIMTC.plugins.n_bytes import NBytes
from EIMTC.plugins.pkt_rel_time import PacketRelativeTime
from EIMTC.plugins.clump_flows import Clump_Flow
from EIMTC.plugins.res_req_diff_time import ResReqDiffTime
import glob
from pathlib import Path

In [None]:
files = glob.glob('./data/pcaps/**/*.pcap')
files

In [None]:
ext = Extractor(
    output_dirpath='./data/',
    custom_plugin_package=[
        ProtocolHeaderFields(n_packets=32), # Lopez
        NBytes(n=784), # wang
        STNN(n_packets=32),
        NPacketsByteFrequency(n_first_packets=6),
        PacketRelativeTime(),
        ResReqDiffTime(),
        Clump_Flow(),
    ],
    TLS=True
)
ext

In [None]:
def extract_labels(filename):
    type_mapping = {
        'chat': ['aim_chat', 'aimchat', 'icq_chat','icqchat', 'skype_chat', 'facebook_chat', 'facebookchat', 'gmailchat' , 'hangout_chat', 'hangouts_chat'],
        'email': ['email'],
        'audio': ['facebook_audio', 'hangouts_audio', 'skype_audio', 'spotify', 'voip'],
        'video': ['facebook_video', 'hangouts_video', 'netflix', 'skype_video', 'vimeo', 'youtube'],
        'filetransfer': ['ftps' ,'scp', 'sftp', 'skype_file'],
        'p2p': ['bittorrent', 'torrent']
    }
    
    app_list = ['skype', 'facebook', 'voipbuster', 'youtube', 'vimeo', 'bitorrent', 'ftps', 'scp', 'sftp', 'hangout', 'netflix', 'spotify', 'aim', 'icq', 'gmail', 'email', 'torrent']
    
    
    traffic_type = None
    for k,v in type_mapping.items():
        for t in v:
            if t in filename.lower():
                traffic_type = k
            
    app = None
    for a in app_list:
        if a in filename.lower():
            app = a
    
    encapsulation = None   
    if 'vpn' in filename:
        encapsulation = 'vpn'
            
    return (encapsulation, traffic_type, app)


In [None]:
def custom_filepath_based_labelling(filepath):
    '''
    returns: dictionary of label's name/type as key and the label as value.
    '''
    filename = Path(filepath).stem
    labels = extract_labels(filename)
    label_names = ['encapsulation', 'traffic_type', 'application']
    return dict(
        zip(label_names, labels)
    )
    
# tests
assert custom_filepath_based_labelling('vpnyoutube') == {'encapsulation': 'vpn', 'traffic_type': 'video', 'application': 'youtube'} 


In [None]:
ext.extract_many(files, labelling_method=custom_filepath_based_labelling)