In [None]:
!apt-get install tshark
!pip install pyshark

In [None]:
import os

import numpy as np
import pandas as pd

import pyshark
import nest_asyncio


In [3]:
nest_asyncio.apply()
cap = pyshark.FileCapture('office_capture_probes_only.pcapng')

TIMESTAMP = []
SA = []
SSID = []
RSSI = []

count = 0
successfull_packets = 0
for packet in cap:
  success = False
  count = count+1

  try:
      timestamp = packet.sniff_timestamp
      wlan_layers = packet.get_multiple_layers('wlan')
      sa = wlan_layers[0].sa
      ssid = wlan_layers[1].ssid
      rssi = packet.wlan_radio.signal_dbm
      success = True
  except:
    success = False

  if(success):
    successfull_packets = successfull_packets + 1
    TIMESTAMP.append(timestamp)
    SA.append(sa)
    SSID.append(ssid)
    RSSI.append(rssi)


print('Read: ', count)
print('Successfully processed:', successfull_packets)

Read:  651
Successfully processed: 469


In [5]:
unique_sa = np.unique(SA)
print('There were ' +str(len(unique_sa)) + ' source MAC addresses')

There were 133 source MAC addresses


In [7]:
#The 'local bit' is the second last bit in the first byte of the MAC
#address. So, masking the first byte with '00000010' mask will return 
#the value of this local bit. if local bit is '1' then address is randomized.

def is_random_mac(sa):
  first_octet = '0x' + sa[0:2]
  first_octet = int(format(int(first_octet,16),'08b'),2)
  mask = int(format(2,'08b'),2)
  if(mask & first_octet > 0):
    return True
  else:
    return False

RANDOM_MAC = []
for sa in SA:
    RANDOM_MAC.append(is_random_mac(sa))

random_addr = 0
true_addr = 0
for sa in unique_sa:
  if is_random_mac(sa):
    random_addr+=1
  else:
    true_addr+=1

print('There were only ' +str(true_addr) + ' real MAC addresses')



There were only 92 real MAC addresses


In [8]:
# CREATE DATAFRAME
df_cap = pd.DataFrame(list(zip(TIMESTAMP,SA,SSID,RSSI,RANDOM_MAC)), columns = ['Timestamp','SA','SSID','RSSI','RANDOM_MAC'])
display(df_cap)

Unnamed: 0,Timestamp,SA,SSID,RSSI,RANDOM_MAC
0,1521033371.663294000,02:4f:38:dd:30:12,SSID:,-85,True
1,1521033371.759730000,40:9c:28:5b:eb:aa,eduroam,-81,False
2,1521033371.769539000,40:9c:28:5b:eb:aa,eduroam,-82,False
3,1521033371.894637000,f8:63:3f:21:b4:43,SSID:,-87,False
4,1521033372.921840000,3c:2e:ff:99:25:4a,polimi-protected,-90,False
...,...,...,...,...,...
464,1521033450.353089000,00:00:80:e7:02:48,polimi-protected,-85,False
465,1521033450.355215000,42:b2:cf:a9:4c:91,\xef\xbf\xbd\xdPo\xef\xbf\xbd\xa,-84,True
466,1521033450.508016000,40:9c:28:5b:eb:aa,eduroam,-73,False
467,1521033450.526097000,40:9c:28:5b:eb:aa,eduroam,-74,False


In [11]:
df_sa = df_cap.groupby(['SA'])
for sa, probes in df_sa:
  ssids = np.unique(probes['SSID'])
  rssi = probes['RSSI'].astype(float).mean()
  print(sa, rssi, ssids)

00:00:00:00:00:3c -89.0 ['SSID: ']
00:00:80:e7:02:48 -85.0 ['polimi-protected']
00:19:07:95:13:a3 -86.0 ['SSID: ']
00:20:00:6f:02:a2 -70.88888888888889 ['SSID: ']
00:21:5d:c0:e4:36 -84.33333333333333 ['SSID: ']
00:23:6c:97:5c:39 -76.0 ['polimi-protected']
00:80:e7:82:b7:76 -90.0 ['eduroam']
02:4f:38:dd:30:12 -85.0 ['SSID: ']
04:d6:aa:06:e0:c6 -88.0 ['SSID: ']
10:02:b5:b7:8e:91 -73.25 ['SSID: ']
12:55:d1:80:4e:cd -84.5 ['SSID: ']
14:9d:09:e2:08:e8 -86.0 ['SSID: ']
16:c2:4b:33:1e:44 -85.66666666666667 ['SSID: ']
1a:3e:48:f7:83:1c -87.0 ['SSID: ']
1a:43:15:a4:da:72 -88.5 ['SSID: ']
1a:74:1c:53:42:19 -86.0 ['SSID: ']
20:62:74:d6:4c:5c -74.25 ['SSID: ']
20:62:74:d6:4c:5e -74.0 ['SSID: ']
22:30:74:8f:89:16 -90.0 ['SSID: ']
22:aa:6f:36:f4:6d -88.0 ['SSID: ']
24:00:ba:f9:9c:30 -79.33333333333333 ['SSID: ']
24:77:03:23:9d:b0 -89.0 ['ITIA-WIFI']
26:e6:39:c8:7a:9d -89.0 ['SSID: ']
28:c2:dd:29:e1:45 -87.5 ['polimi-protected']
28:fe:cd:18:6d:55 -85.5 ['SSID: ']
2c:61:f6:7a:e4:2b -72.5 ['eduroam']
2