In [14]:
import numpy as np
import pickle
import scipy.io
import os
import json
import xmltodict
import h5py
import requests

In [41]:
MIN_FRAMES = 500
FRAMES_TRAIN = 500
FRAMES_TEST = 100
PKT_LEN = 400
DIR_SOURCE = '/Users/stepanmazokha/Desktop/wisig_frames_rffi_dataset/node1-1/equalized_packets_min500frames/'
# FILE_TARGET = '/Users/stepanmazokha/Desktop/wisig_frames_rffi_dataset/node1-1/node1-1.pkl'
FILE_TARGET_NON_EQ = '/Users/stepanmazokha/Desktop/wisig_frames_rffi_dataset/node1-1/node1-1_non_eq.h5'
FILE_TARGET_EQ = '/Users/stepanmazokha/Desktop/wisig_frames_rffi_dataset/node1-1/node1-1_eq.h5'

In [42]:
def save_dataset_h5(file_target, label, data):
    with h5py.File(file_target, 'w') as h5file:
        h5file.create_dataset('label', label)
        h5file.create_dataset('data', data)

def process_save_rx(dir_source, file_target_non_eq, file_target_eq, frame_threshold, pkt_len):
    # Contains a list of 3D np arrays for each of the transmitters
    # Each array has a shape (L, N, 2)
    # - L: number of frames captured (at least frame_threshold)
    # - N: number of samples for each frame (size of the preamble at 25 Msps, 400)
    # - 2: data is split into real & imag parts, hence the 2 instead of 1
    data_non_eq = [] # not equalized
    data_eq = [] # equalized
    # Contains a list of names of each of the transmitters
    node_list = [] 

    # Work trough each TX file with frames
    for fname in os.listdir(dir_source):
        f = scipy.io.loadmat(dir_source + fname, verify_compressed_data_integrity=False)
        
        # Retrieve the list of frames; each item is a cell, containing two vectors: non-eq & eq IQ samples
        frames = f['packet_log'][0]
        node_name = fname[8:-4]

        # Don't process the file if not enough frames inside
        if len(frames) < frame_threshold:
            print(node_name,'Eliminated')
            continue
        # else: print(node_name, 'Processing')
        
        data_i_non_eq = np.zeros((frames.size, pkt_len, 2), dtype='float32')
        data_i_eq = np.zeros((frames.size, pkt_len, 2), dtype='float32')

        for frame_idx in np.arange(len(frames)):
            # Separately save real & iamginary parts of IQ samples for both non-equalized & equalized versions
            data_i_non_eq[frame_idx, :, 0] = np.real(frames[0][:, 0])
            data_i_non_eq[frame_idx, :, 1] = np.imag(frames[0][:, 0])

            data_i_eq[frame_idx, :, 0] = np.real(frames[0][:, 1])
            data_i_eq[frame_idx, :, 1] = np.imag(frames[0][:, 1])
                
        data_non_eq.append(data_i_non_eq)
        data_eq.append(data_i_eq)
        node_list.append(node_name)

    # If TX device is in the list of devices used for training the model

    # If TX device is in the list of devices used for testing the model

    # with open(file_target,'wb') as f:
    #     pickle.dump({
    #         'data_non_eq': data_non_eq,
    #         'data_eq': data_eq,
    #         'node_list': node_list
    #     },f)

    return [data_non_eq, data_eq, node_list]

_, _, node_list = process_save_rx(DIR_SOURCE, FILE_TARGET_NON_EQ, FILE_TARGET_EQ, MIN_FRAMES, PKT_LEN)

In [107]:
ORBIT_DEVICE_INFO = '/Users/stepanmazokha/Desktop/wisig_frames_rffi_dataset/orbit_device_info.json'

def get_orbit_node_capabilities(node_id, show = False):
    url = f"https://www.orbit-lab.org/cPanel/status/getNodeCapabilities?node=node{node_id}.grid.orbit-lab.org"
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "en-US,en;q=0.9,uk-UA;q=0.8,uk;q=0.7,ru;q=0.6",
        "Authorization": "Basic c21hem9raGE6LWkyMXB4OHR5cg==",
        "Connection": "keep-alive",
        "Cookie": "trac_form_token=39202d14196f94e14ee8fca3; trac_auth=6865493b9d6768ff121dbaeba46347f5",
        "Host": "www.orbit-lab.org",
        "Referer": "https://www.orbit-lab.org/cPanel/status/template/index.html",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest",
        "sec-ch-ua": "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"",
        "sec-ch-ua-mobile": "?0"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        responseJson = xmltodict.parse(response.text)
        if show: print(json.dumps(responseJson, indent=4))
        return responseJson
    else: return None

def save_dict_to_json_file(dictionary, file_path):
    with open(file_path, 'w') as json_file:
        json.dump(dictionary, json_file, indent=4)

def read_json_file_to_dict(file_path):
    with open(file_path, 'r') as json_file:
        dictionary = json.load(json_file)
    return dictionary

def contains_allowed_substring(input_string, allowed_substrings):
    for substring in allowed_substrings:
        if substring in input_string: return True
    return False

def get_orbit_node_infos(node_list, file_path):
    node_infos = {}

    for node_id in node_list:
        print("Processing", node_id)
        node_info = get_orbit_node_capabilities(node_id)

        if node_info is None:
            print(node_id, ': nothing found')
        else:
            node_infos[node_id] = node_info['response']['action']['devices']['device']
        
    save_dict_to_json_file(node_infos, file_path)

def filter_nodes_by_device_model(node_infos):
    # Paper mentions that they were using Atheros 5212, 9220, 9280, and 9580 WiFi cards
    # We need to find the largest number of nodes (for which we have sufficient data)
    # with ONE of these cards on board (remember: we need the same hardware vendor for 
    # better model performance)
    #
    # After some experimentation, turns out that 5212 card is most common (47 devices w 500 frame limit)
    #
    # Additionally, card 5212 has one device. 
    # 
    # Also, uniqueness of the vendor/model can be identified using the @INV_dev_id field.

    device_types_allowed = ['5212']

    node_list_filtered = []
    for node_id in node_infos:
        node_info = node_infos[node_id]

        node_fit_devices = 0
        for device in node_info:
            device_id = device.get('@INV_dev_id')
            device_type = device.get("@INV_dev_type")
            device_name = device.get('@name')
        
            if contains_allowed_substring(device_type, device_types_allowed):
                # print('[', device_id, ']:', node_id, ':', device_name, '(', device_type, ')')
                node_fit_devices = node_fit_devices + 1

        if node_fit_devices == 0:
            print(node_id, ':', '5212 NOTHING FOUND')
        elif node_fit_devices >= 1:
            node_list_filtered.append(node_id)

    print('Nodes with Atheros 5212 WiFi card found:', len(node_list_filtered))

    return node_list_filtered

# get_orbit_node_infos(node_list, file_path=ORBIT_DEVICE_INFO)
node_infos = read_json_file_to_dict(file_path=ORBIT_DEVICE_INFO)
node_list_filtered = filter_nodes_by_device_model(node_infos)

20-7 : 5212 NOTHING FOUND
Nodes with Atheros 5212 WiFi card found: 47


SyntaxError: expected ':' (2638385428.py, line 1)