# Required Imports

In [1]:
import os
import pandas as pd
import re
import matplotlib.pyplot as plt
from io import BytesIO
from scipy.io import loadmat  # this is the SciPy module that loads mat-files

# Imports PIL module
from PIL import Image

In [2]:
# Configuration Settings
n_packets = 100 # Number of packets within each packet group per ID, use even numbers. This defines the number of packet groups per ID (total_packets/n_packets)

# Dataset Generation

## Load IQR Amplitude values 

In [3]:
# Define file ID
def extract_id_from_filename(filename):
    """ Extract the ID from the filename, including the number and the trailing 'A' or 'B'. """
    match = re.search(r'(\d+[mf][AB])\.', filename) # e.g., 00001mA
    return match.group(1) if match else None

# Load amplitudes from CSV files
def load_amplitudes(directory):
    # Initialize a dictionary to hold amplitudes
    amplitudes = {}

    for file_name in os.listdir(directory):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory, file_name)

            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Extract ID from the file name
            file_id = extract_id_from_filename(file_name)
            if file_id:
                # Collect amplitudes per ID[mf][AB] and store them in a dictionary
                amplitudes[file_id] = df

    return amplitudes

# Folder to save the processed files
output_folder = './output/'

# Directory containing the CSV files to process
input_directory = os.path.join(output_folder,'CSV','sanitized_amplitudes', 'IQR_amplitudes')

# Process and save the files
sanitized_amplitudes = load_amplitudes(input_directory)

In [4]:

# This dict will contain the packet groups for each ID
split_sanitized_amplitudes = {}

# Splitting the sanitized amplitudes into groups of 'n_packets' per ID
for ID, amplitude_values in sanitized_amplitudes.items():
    split_sanitized_amplitudes[ID] = []
    # Calculate the number of full groups that can be made
    num_full_groups = len(amplitude_values) // n_packets

    for i in range(num_full_groups):
        # Calculate the start index for each group
        start_index = i * n_packets
        # Calculate the end index for each group
        end_index = start_index + n_packets
        # Extract the group from amplitude_values
        split_amplitude_group = amplitude_values[start_index:end_index]
        # Append the group to the corresponding ID's list
        split_sanitized_amplitudes[ID].append(split_amplitude_group)

    # Handle any remaining elements that didn't form a full group
    if len(amplitude_values) % n_packets != 0:
        # Add the remaining elements as a separate group
        split_sanitized_amplitudes[ID].append(amplitude_values[num_full_groups * n_packets:])

## Split data into Train, Validation, and Test sets

In [5]:
# Initialize an empty dictionary to group keys in sanitized_amplitudes by their ID, e.g., ['00001': ['A': '00001mA', 'B': '00001mB']]
groups = {}
# Iterate over each key in the dictionary split_sanitized_amplitudes
for key in split_sanitized_amplitudes.keys():
    # Extract the ID part of the key
    id = key[:-2]  # Assuming the last two characters are 'mA' or 'mB', 'fA' or 'fB'
    # If this ID is not already in the groups dictionary, add it with subgroups 'A' and 'B'
    if id not in groups:
        groups[id] = {'A': [], 'B': []}
    # Depending on whether the key ends with 'A' or 'B', add it to the respective subgroup
    if key.endswith('A'):
        groups[id]['A'].append(key)
    elif key.endswith('B'):
        groups[id]['B'].append(key)

# Convert groups to a list and shuffle
grouped_items = list(groups.values())

# Extract all keys ending with 'A' from each group for the training set
train_keys = [key for group in grouped_items for key in group['A']] # all IDs ending with 'A' are used for training
# Extract all keys ending with 'B' from each group for validation and test sets
val_and_test_keys = [key for group in grouped_items for key in group['B']] # all IDs ending with 'B' are used for validation and testing

# Create the training set dictionary using the train keys
train_set = {key: split_sanitized_amplitudes[key] for key in train_keys}
# Create the validation and test set dictionary using the validation and test keys
val_test_sets = {key: split_sanitized_amplitudes[key] for key in val_and_test_keys}

## Generate Probes and Gallery for each set

In [6]:
# Function to create probe and gallery sets for the training data
def create_train_probe_gallery(dataset):
    probe = {}
    gallery = {}
    # Iterate through each ID in the dataset
    for key in dataset.keys():
        amplitudes = dataset[key]
        # Split the amplitudes into two equal halves
        half_length = len(amplitudes) // 2
        probe_packets = amplitudes[:half_length]
        gallery_packets = amplitudes[half_length:]
        
        # Assign the first half to the probe and the second half to the gallery
        probe[key] = probe_packets
        gallery[key] = gallery_packets

    return probe, gallery

# Function to create probe and gallery sets for validation and test data
def create_valtest_probe_gallery(dataset, val_n_probe = 1, val_n_gallery = 1, test_n_probe = 1, test_n_gallery = 1):
    val_probe = {}
    val_gallery = {}

    test_probe = {}
    test_gallery = {}
    
    # Iterate through each ID in the dataset
    for key in dataset.keys():
        amplitudes = dataset[key]

        # Divide the packet groups into specified numbers for validation and test sets
        val_probe[key] = amplitudes[0:val_n_probe]
        val_gallery[key] = amplitudes[val_n_probe:val_n_probe+val_n_gallery]

        test_probe[key] = amplitudes[val_n_probe+val_n_gallery:(val_n_probe+val_n_gallery)+test_n_probe]
        test_gallery[key] = amplitudes[(val_n_probe+val_n_gallery)+test_n_probe:((val_n_probe+val_n_gallery)+test_n_probe)+test_n_gallery]

    return val_probe, val_gallery, test_probe, test_gallery

# Create probe and gallery sets for train, validation, and test data
train_probe, train_gallery = create_train_probe_gallery(train_set)
val_probe, val_gallery, test_probe, test_gallery = create_valtest_probe_gallery(val_test_sets, val_n_probe = 2, val_n_gallery = 2, test_n_probe = 3, test_n_gallery = 3)

## Generate Magnitude Heatmaps



In [7]:
def save_heatmap_image(data, output_path):
  # Plotting the Magnitude of WiFi Signal without axes and colorbar
  #plt.figure(figsize=(12, 6))
  plt.figure()
  plt.imshow(data, aspect='auto')
  plt.axis('off')  # Disable the axis

  # Saving the plot to a BytesIO object
  buffer = BytesIO()
  plt.savefig(buffer, format='png', bbox_inches='tight', pad_inches=0)
  plt.close()
  buffer.seek(0)  # Rewind the buffer to the beginning so you can read its content

  # Open the image using PIL and convert to RGB (dropping alpha channel)
  img = Image.open(buffer)
  rgb_img = img.convert('RGB')

  # Save the RGB image
  rgb_img.save(output_path, format='PNG')

  # Saving the plot to a BytesIO object to return the binary data
  #buffer = BytesIO()
  #plt.savefig(buffer, bbox_inches='tight', pad_inches=0, facecolor='white', edgecolor='white')
  #plt.close()
  #buffer.seek(0)  # Rewind the buffer to the beginning so you can read its content

  # Write the buffer content to a file to save the image
  #with open(output_path, "wb") as f:
      #f.write(buffer.read())
  return

##################### TRAIN IMAGES ##########################

# Generate and save image data for train probes
if train_probe:
  # Folder to save the train probes
  output_path = os.path.join(output_folder,'ImageData','D1', 'Probe')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

  # Iterate over each ID in the train_probe dictionary
    for ID, group_list in train_probe.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

# Generate and save image data for train gallery
if train_gallery:
  # Folder to save the train gallery
  output_path = os.path.join(output_folder,'ImageData','D1','Gallery')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

    # Iterate over each ID in the train_gallery dictionary
    for ID, group_list in train_gallery.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

##################### VAL IMAGES ############################

# Generate and save image data for val probes
if val_probe:
  # Folder to save the val probes
  output_path = os.path.join(output_folder,'ImageData','D2v','Probe')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

  # Iterate over each ID in the val_probe dictionary
    for ID, group_list in val_probe.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

# Generate and save image data for val gallery
if val_gallery:
  # Folder to save the val gallery
  output_path = os.path.join(output_folder,'ImageData','D2v','Gallery')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

  # Iterate over each ID in the val_gallery dictionary
    for ID, group_list in val_gallery.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

##################### TEST IMAGES ##########################

# Generate and save image data for test probes
if test_probe:
  # Folder to save the test probes
  output_path = os.path.join(output_folder,'ImageData','D2t','Probe')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

  # Iterate over each ID in the test_probe dictionary
    for ID, group_list in test_probe.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

# Generate and save image data for test gallery
if test_gallery:
  # Folder to save the test gallery
  output_path = os.path.join(output_folder,'ImageData','D2t','Gallery')
  if not os.path.exists(output_path):
    os.makedirs(output_path)

  # Iterate over each ID in the test_gallery dictionary
    for ID, group_list in test_gallery.items():
        # Iterate over each packet group for the current ID
        for group_idx, group_packets in enumerate(group_list):
            # Convert the group packets to a numpy array and drop the timestamp column
            group_values = group_packets.drop(columns='timestamp').to_numpy()
            
            # Define the filename using the probe ID and group index
            filename = f"{ID}_{group_idx}.png"
            
            # Save the heatmap image for the current group
            save_heatmap_image(group_values, os.path.join(output_path, filename))

#!zip -r /content/CSV.zip /content/output/CSV
#!zip -r /content/ImageData.zip /content/output/ImageData
