In [27]:
import pyshark
import glob, os
import numpy as np
import pandas as pd
import itertools
import time

In [5]:
# Global Variables
ROUTER = '78:d2:94:4d:ab:3e'
WIFI_DEVICES = ['ec:1a:59:e4:fd:41', 'ec:1a:59:e4:fa:09',
                'ec:1a:59:e5:02:0d', '14:91:82:24:dd:35',
                '60:38:e0:ee:7c:e5', '14:91:82:cd:df:3d',
                'b4:75:0e:0d:94:65', 'b4:75:0e:0d:33:d5',
                '94:10:3e:2b:7a:55', '30:8c:fb:3a:1a:ad',
                'd0:73:d5:26:b8:4c', 'd0:73:d5:26:c9:27',
                'ac:84:c6:97:7c:cc', 'b0:4e:26:c5:2a:41',
                '70:4f:57:f9:e1:b8', ROUTER]

DEVICE_TYPE = {'ec:1a:59:e4:fd:41' : 'camera',
               'ec:1a:59:e4:fa:09' : 'camera',
               'ec:1a:59:e5:02:0d' : 'camera',
               '14:91:82:24:dd:35' : 'plug',
               '60:38:e0:ee:7c:e5' : 'plug',
               '14:91:82:cd:df:3d' : 'plug',
               'b4:75:0e:0d:94:65' : 'plug',
               'b4:75:0e:0d:33:d5' : 'plug',
               '94:10:3e:2b:7a:55' : 'plug',
               '30:8c:fb:3a:1a:ad' : 'camera',
               'd0:73:d5:26:b8:4c' : 'bulb', 
               'd0:73:d5:26:c9:27' : 'bulb',
               'ac:84:c6:97:7c:cc' : 'camera', 
               'b0:4e:26:c5:2a:41' : 'bulb',
               '70:4f:57:f9:e1:b8' : 'plug',
                ROUTER : 'router'}

DEVICE_NAME = {'ec:1a:59:e4:fd:41' : 'Netcam1', 
               'ec:1a:59:e4:fa:09' : 'Netcam2',
               'ec:1a:59:e5:02:0d' : 'Netcam3',
               '14:91:82:24:dd:35' : 'Insight',
               '60:38:e0:ee:7c:e5' : 'Mini',
               '14:91:82:cd:df:3d' : 'Switch1',
               'b4:75:0e:0d:94:65' : 'Switch2',
               'b4:75:0e:0d:33:d5' : 'Switch3',
               '94:10:3e:2b:7a:55' : 'Switch4',
               '30:8c:fb:3a:1a:ad' : 'Dropcam',
               'd0:73:d5:26:b8:4c' : 'Lifx1', 
               'd0:73:d5:26:c9:27' : 'Lifx2',
               'ac:84:c6:97:7c:cc' : 'Kasa', 
               'b0:4e:26:c5:2a:41' : 'TpBulb',
               '70:4f:57:f9:e1:b8' : 'TpPlug',
                ROUTER : 'Router'}

TRAINING_TEST = {'ec:1a:59:e4:fd:41' : 'train', 
                 'ec:1a:59:e4:fa:09' : 'train',
                 'ec:1a:59:e5:02:0d' : 'test',
                 '14:91:82:24:dd:35' : 'train',
                 '60:38:e0:ee:7c:e5' : 'train',
                 '14:91:82:cd:df:3d' : 'train',
                 'b4:75:0e:0d:94:65' : 'train',
                 'b4:75:0e:0d:33:d5' : 'train',
                 '94:10:3e:2b:7a:55' : 'test',
                 '30:8c:fb:3a:1a:ad' : 'train',
                 'd0:73:d5:26:b8:4c' : 'train', 
                 'd0:73:d5:26:c9:27' : 'test',
                 'ac:84:c6:97:7c:cc' : 'test', 
                 'b0:4e:26:c5:2a:41' : 'train',
                 '70:4f:57:f9:e1:b8' : 'test',}

DATA_PKT_SUBTYPES = {32 : 'Data',
                     40 : 'QoS_Data',
                     44 : 'QoS_Null'}

FEATURES = ["Time", "PacketLength", "Duration", "SourceAddr", "DestAddr", "Subtype"]
SRC_DIR = './Source/'
DST_DIR = './Destination/'
PCAP_DIR = '/root/Documents/Thesis/PCAPS'
TIMING_PKT_NUMBER = 25000
DATA_FRAME_TYPE = '2'

path_name = os.getcwd()
DATE = path_name[path_name.rindex('/')+1:]
PROC_TIME = "wifi_processing_time_" + DATE + ".csv"

In [6]:
def get_mac_vendors():
    """
    Uses the macvendors.co API to lookup the vendors of Wi-Fi devices.
    Requires internet access.
    
    Parameters
    ----------
    None
    
    Output
    ------
    None
    
    Returns
    -------
    device_vendors (dict): keys(str) = WIFI_DEVICES MAC addresses, values(str) = vendor names
    """
    import json, requests

    MAC_LOOKUP_API = 'http://macvendors.co/api/'

    # Get JSON response from API
    vendors_json = []
    for addr in WIFI_DEVICES:
        response = requests.get(MAC_LOOKUP_API + addr).text
        vendors_json.append(response)

    # Extracting company from API response
    vendors = []
    for vendor_json in vendors_json:
        response = json.loads(vendor_json)
        company = str(response['result']['company']).split(' ',1)[0].capitalize()
        vendors.append(company)

    # Put device MAC addresses and vendors into dictionary
    device_vendors = dict(zip(WIFI_DEVICES, vendors))
    
    return device_vendors

path='/root/Documents/Thesis/Code/Source'
all_csvs = glob.glob(os.path.join(path, "*.csv"))

# Collect all csvs in one dataframe
df_from_each_file = (pd.read_csv(f) for f in all_csvs)
df = pd.concat(df_from_each_file, ignore_index=True, sort=False)

# Add device type, device ID of each packet
df["DeviceType"] = df["SourceAddr"].map(DEVICE_TYPE)
df["Name"] = df["SourceAddr"].map(DEVICE_NAME)

# Add whether device is a training or test device
df["Set"] = df["SourceAddr"].map(TRAINING_TEST)

# One-hot encode device type (response variable)
deviceType_series = pd.get_dummies(df["DeviceType"])
df = pd.concat([df, deviceType_series], axis=1)

# One-hot encode MAC vendors
df["Vendor"] = df["SourceAddr"].map(get_mac_vendors())
vendor_series = pd.get_dummies(df["Vendor"])
df = pd.concat([df, vendor_series], axis=1)

# One-hot encode packet subtype
df["Subtype"] = df["SubtypeNum"].map(DATA_PKT_SUBTYPES)
subtype_series = pd.get_dummies(df["Subtype"])
df = pd.concat([df, subtype_series], axis=1)    

# Count packets for each device
device_counts = df["Name"].value_counts()

In [76]:
cap = pyshark.FileCapture('/root/Documents/Thesis/PCAPS/master.cap', only_summaries=False)

In [77]:
# Find frame type
subtypes = {"Mgmt": 0, "Control" : 0, "Data" : 0}
for pkt in cap:
    if pkt.wlan.fc_type == '0':
        subtypes["Mgmt"] += 1
    if pkt.wlan.fc_type == '1':
        subtypes["Control"] += 1
    if pkt.wlan.fc_type == '2':
        subtypes["Data"] += 1

# Print out value counts of subtypes
total = sum(subtypes.values())
for key, value in subtypes.iteritems():
    print key, ",", value, ",", value/float(total)
print total

qos_pkt = cap[169]
data_pkt = data_pkts[0]
QOS = '44'
pkt.wlan.fc_type_subtype == QOS

data_pkt.wlan.fc_type

In [64]:
test_pkt = data_pkts[0]
print test_pkt.wlan.field_names

['', 'fc_frag', 'sa_resolved', 'fc_type_subtype', 'ra', 'seq', 'addr', 'ta', 'fc', 'fc_protected', 'da', 'ccmp_extiv', 'ta_resolved', 'qos_tid', 'fc_moredata', 'staa_resolved', 'fc_subtype', 'flags', 'da_resolved', 'fc_version', 'fc_order', 'qos_priority', 'qos_ack', 'sa', 'duration', 'fc_tods', 'staa', 'wep_key', 'qos', 'addr_resolved', 'fc_ds', 'frag', 'fc_fromds', 'fc_pwrmgt', 'bssid_resolved', 'qos_txop_dur_req', 'qos_amsdupresent', 'fc_retry', 'bssid', 'ra_resolved', 'qos_bit4', 'fc_type']


In [68]:
for pkt in data_pkts:
#     print pkt.frame_info.get_field('encap_type')
    for i, field in enumerate(pkt.wlan.field_names):
        print pkt.wlan.field_names[i] + ": " + pkt.wlan.get_field(field)

: CCMP parameters
fc_frag: 0
sa_resolved: ac:84:c6:97:7c:cc
fc_type_subtype: 40
ra: 78:d2:94:4d:ab:3e
seq: 3224
addr: 78:d2:94:4d:ab:3e
ta: ac:84:c6:97:7c:cc
fc: 0x00008841
fc_protected: 1
da: 78:d2:94:4d:ab:3e
ccmp_extiv: 0x000000176C15
ta_resolved: ac:84:c6:97:7c:cc
qos_tid: 0
fc_moredata: 0
staa_resolved: ac:84:c6:97:7c:cc
fc_subtype: 8
flags: 0x00000041
da_resolved: 78:d2:94:4d:ab:3e
fc_version: 0
fc_order: 0
qos_priority: 0
qos_ack: 0x00000000
sa: ac:84:c6:97:7c:cc
duration: 202
fc_tods: 1
staa: ac:84:c6:97:7c:cc
wep_key: 0
qos: 0x00000000
addr_resolved: 78:d2:94:4d:ab:3e
fc_ds: 0x00000001
frag: 0
fc_fromds: 0
fc_pwrmgt: 0
bssid_resolved: 78:d2:94:4d:ab:3e
qos_txop_dur_req: 0
qos_amsdupresent: 0
fc_retry: 0
bssid: 78:d2:94:4d:ab:3e
ra_resolved: 78:d2:94:4d:ab:3e
qos_bit4: 0
fc_type: 2

: CCMP parameters
fc_frag: 0
sa_resolved: 14:91:82:cd:df:3d
fc_type_subtype: 40
ra: 78:d2:94:4d:ab:3e
seq: 4053
addr: 78:d2:94:4d:ab:3e
ta: 14:91:82:cd:df:3d
fc: 0x00008841
fc_protected: 1
da: 78:d2

In [28]:
def random_forest_classifier(X_train, y_train, X_test, y_test):
    time_start = time.time()
    
    randomforest = RandomForestClassifier(random_state=0, n_jobs=2)
    rf_model = randomforest.fit(X_train, y_train)

    preds = rf_model.predict(X_test)
    score = rf_model.score(X_test, y_test)
    
    time_elapsed = time.time() - time_start
    return {'Score' : score, 'Time' : time_elapsed}

In [36]:
from sklearn.ensemble import RandomForestClassifier

features_list = [
        # Packet info
        "PacketLength", "Duration", 
        
        # Vendor 
        "Belkin", "Dropcam", "Lifi", "Netgear", "Tp-link",
    
        # 802.11 Data subtype
        "Data", "QoS_Data", "QoS_Null"]

y_list = ["camera", "bulb", "plug"]

# Get combos for one vs one
combinations = [combination for combination in itertools.combinations(y_list, 2)]

for device_type in combinations:
    # Only use data with the two device types needed for one vs one classification
    # classes = combinations[0]
    pos_device_type = device_type[0]
    neg_device_type = device_type[1]

    df_1v1 = df[(df["DeviceType"]==pos_device_type) | (df["DeviceType"]==neg_device_type)]
#     print "Original df:", df.shape
#     print "1v1 df:", df_1v1.shape

    # Separate df into train and test sets
    df_train = df_1v1[df_1v1["Set"]=="train"]
    df_test = df_1v1[df_1v1["Set"]=="test"]

    X_train = df_train[features_list]
    X_test = df_test[features_list]
    y_train = df_train[pos_device_type]
    y_test = df_test[pos_device_type]

    rf_clf = random_forest_classifier(X_train, y_train, X_test, y_test)
    print "{} vs {}".format(pos_device_type, neg_device_type)
    print "Score: {} /t Time: {} sec".format(rf_clf['Score'], rf_clf['Time'])

camera vs bulb
Score: 0.187848326324 /t Time: 0.419248104095 sec
camera vs plug
Score: 0.36090153758 /t Time: 0.540331125259 sec
bulb vs plug
Score: 0.828362573099 /t Time: 0.511598110199 sec
