In [None]:
from signal_analysis import SignalAnalysis
import os
import numpy as np
import pandas as pd

**Connection to Google Drive**
- Locally: use rclone to mount Google Drive to a local folder
- Google Colab

In [None]:
# Unmount if already mounted, then mount with proper caching
# !fusermount -u ~/GoogleDrive 2>/dev/null || true
# !rclone mount gdrive: ~/GoogleDrive --vfs-cache-mode full --daemon --vfs-read-chunk-size 128M --buffer-size 128M

try:
    from google.colab import drive, files
    IN_COLAB = True
    drive.mount('/content/drive')
    data_path = '/content/drive/MyDrive/N-MON/Dataset_LINKS'
    print("Running in Google Colab.")

except:
    IN_COLAB = False
    data_path = '/home/cavallinux/GoogleDrive/N-MON/Dataset_LINKS'
    print("Running locally.")

import time
print("Waiting for mount to stabilize...")
time.sleep(2)
print("Mount ready!")

Mounted at /content/drive
Running in Google Colab.
Waiting for mount to stabilize...
Mount ready!


**Environment Setup**

In [None]:
# Input file details
# __basedir__ = os.path.abspath("../..")
# path_of_file = os.path.join(__basedir__, 'IQ_DATA') # Path to the folder containing IQ files
fs_ = 10e6  # Sampling frequency
fc_ = 1575.42e6 # Carrier frequency
output_type_ = "fc32"    # Output type of the file - float32
chunk_separation_file = 100e-3  # Chunk separation in seconds (100 ms) - if we want to increase the amount of read data increase this value
skip_second = 5  # Seconds to skip at the beginning of the file
type_to_do = ['CLEAN']  # 'LWF', 'LN', 'TICK', 'TRI', 'TRIW'

**Folder Structure**

In [None]:
jammer_folder = [os.path.join(data_path, f) for f in os.listdir(data_path)]
jammer_folder

['/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_112953_LWF',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_115231_LN',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_135534_TICK',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_142018_TRI',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_154305_TRIW',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_160202_CLEAN',
 '/content/drive/MyDrive/N-MON/Dataset_LINKS/Observation_N_MON.zip']

**Test Data Access**

In [None]:
# Test if rclone mount supports offset reads properly
test_file = jammer_folder[0] if jammer_folder else None
if test_file:
    print(f"Testing file access: {test_file}")

    # Test 1: Basic read from start
    try:
        data = np.fromfile(test_file, dtype=np.complex64, count=10)
        print(f"✓ Basic read from start: {data.shape}")
    except Exception as e:
        print(f"✗ Basic read failed: {e}")

    # Test 2: Read with offset using np.fromfile
    try:
        data = np.fromfile(test_file, dtype=np.complex64, offset=1000, count=10)
        print(f"✓ Read with offset (np.fromfile): {data.shape}")
    except Exception as e:
        print(f"✗ Read with offset failed: {e}")

    # Test 3: Manual seek and read (alternative approach)
    try:
        with open(test_file, 'rb') as f:
            f.seek(1000)
            data = np.fromfile(f, dtype=np.complex64, count=10)
        print(f"✓ Manual seek then read: {data.shape}")
    except Exception as e:
        print(f"✗ Manual seek failed: {e}")

    # Test 4: Read with large offset (like in your actual code)
    try:
        data = np.fromfile(test_file, dtype=np.complex64, offset=400000000, count=1000)
        print(f"✓ Read with large offset (400MB): {data.shape}")
    except Exception as e:
        print(f"✗ Read with large offset failed: {e}")

Testing file access: /content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_112953_LWF
✓ Basic read from start: (10,)
✓ Read with offset (np.fromfile): (10,)
✓ Manual seek then read: (10,)
✓ Read with large offset (400MB): (1000,)


**Data processing of P1, P2, P3, P4**

In [None]:
for jammer_path in jammer_folder:
    interference_type = jammer_path.split('_')[-1]  # extract the jamming type from the folder name
    if interference_type not in type_to_do: # skip if the interference type is not the same that we want to process
        continue
    print(f"Processing interference type: {interference_type}")

    #Create Columns
    features_name = [f'f{i}' for i in range(1, 17)] + ['power', 'path_spec', 'class']
    rows = []

    # dataset_output
    base_output_dir = os.path.join('./', 'image_dataset', interference_type)    # base output directory
    base_array_dir = os.path.join('./', 'array_dataset', interference_type)    # numpy array output directory
    os.makedirs(base_output_dir, exist_ok=True) # create the output directory if it does not exist
    os.makedirs(base_array_dir, exist_ok=True) # create array directory

    file_size = os.path.getsize(jammer_path)    # get the size of the file
    total_sec = round(file_size / fs_ / {'fc32': 8, 'sc16': 4, 'sc8': 2}[output_type_]) # calculate the total duration of the file in seconds

    print(f'File size: {file_size:,} bytes')
    print(f'Total seconds for {jammer_path}: {total_sec} ({total_sec/60:.1f} minutes)')

    signal_analysis_object = SignalAnalysis(fs=fs_, fc=fc_, output_type=output_type_, file_name=jammer_path)    # create the SignalAnalysis object

    # MAIN LOOP for creating dataset
    # Each jamming file contains 8 different power levels. Each power level lasts 2 minutes (120 seconds)
    # For each power level: skip first 5 seconds and last 5 seconds, process the middle 110 seconds
    power_duration = 120  # seconds per power level
    valid_duration_per_power = power_duration - 2 * skip_second  # 110 seconds of valid data per power level
    num_power_levels = 8

    for power_idx in range(int(num_power_levels/2)):
        jammer_power = f"P{power_idx + 1}"  # P1, P2, P3, P4
        power_dir = os.path.join(base_array_dir, f"{jammer_power}")
        os.makedirs(power_dir, exist_ok=True)

        # Calculate time range for this power level
        power_start = power_idx * power_duration + skip_second  # Skip first 5 seconds
        power_end = (power_idx + 1) * power_duration - skip_second  # Skip last 5 seconds

        # Make sure we don't exceed file duration
        if power_start >= total_sec:
            print(f"Skipping power level {jammer_power} - beyond file duration")
            break
        if power_end > total_sec:
            power_end = total_sec

        print(f"Processing {jammer_power}: from {power_start}s to {power_end}s ({power_end - power_start:.1f}s)")

        # Process chunks within this power level
        loop_range = np.arange(power_start, power_end, chunk_separation_file)
        print(f"  Will process {len(loop_range)} chunks for {jammer_power}")

        try:
            for i, start_point_file in enumerate(loop_range):
                file_name_img = os.path.join(base_output_dir, f"{jammer_power}_{i}.jpg")
                spec_array = signal_analysis_object.spectrogram_image(start_point=start_point_file)
                file_name_npy = os.path.join(power_dir, f"{jammer_power}_{i}.npy")
                np.save(file_name_npy, spec_array)  # Saves as (128, 873) uint8 array

                features_signal = signal_analysis_object.extract_features(start_point=start_point_file)
                rows.append(features_signal + [jammer_power, file_name_img, interference_type])

                print(f"  Saved {file_name_npy}")

                # if 0 == i % 100:
                #     print(f"  {jammer_power}: {start_point_file:.1f}s (chunk {i+1}/{len(loop_range)})", end='\r', flush=True)

            print(f"  {jammer_power}: Completed {len(loop_range)} chunks                    ")

        except Exception as e:
            print(f"\nERROR in {jammer_power} at chunk {i}: {str(e)}")
            import traceback
            traceback.print_exc()
            print("Continuing to next power level...")
            continue

    print(f"\nTotal rows collected: {len(rows)}")
    df = pd.DataFrame(rows, columns=features_name)
    output_csv_path = os.path.join('./', 'image_dataset', f'features_file_{interference_type}.csv')
    df.to_csv(output_csv_path, sep=',')
    print(f"Completed {interference_type}, saved to {output_csv_path}")

Processing interference type: TRIW
File size: 76,918,321,536 bytes
Total seconds for /content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_154305_TRIW: 961 (16.0 minutes)
Processing P1: from 5s to 115s (110.0s)
  Will process 1100 chunks for P1
Using memory-mapped file access for LINKS_L1_10Mhz_complex_int8_20251202_154305_TRIW
  Saved ./array_dataset/TRIW/P1/P1_0.npy
  Saved ./array_dataset/TRIW/P1/P1_1.npy
  Saved ./array_dataset/TRIW/P1/P1_2.npy
  Saved ./array_dataset/TRIW/P1/P1_3.npy
  Saved ./array_dataset/TRIW/P1/P1_4.npy
  Saved ./array_dataset/TRIW/P1/P1_5.npy
  Saved ./array_dataset/TRIW/P1/P1_6.npy
  Saved ./array_dataset/TRIW/P1/P1_7.npy
  Saved ./array_dataset/TRIW/P1/P1_8.npy
  Saved ./array_dataset/TRIW/P1/P1_9.npy
  Saved ./array_dataset/TRIW/P1/P1_10.npy
  Saved ./array_dataset/TRIW/P1/P1_11.npy
  Saved ./array_dataset/TRIW/P1/P1_12.npy
  Saved ./array_dataset/TRIW/P1/P1_13.npy
  Saved ./array_dataset/TRIW/P1/P1_14.npy
  Saved ./array_dataset/

In [None]:
# download the folder array_dataset
if IN_COLAB:
    !zip -r array_dataset1.zip ./array_dataset
    files.download("array_dataset1.zip")

  adding: array_dataset/ (stored 0%)
  adding: array_dataset/TRIW/ (stored 0%)
  adding: array_dataset/TRIW/P4/ (stored 0%)
  adding: array_dataset/TRIW/P4/P4_737.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_107.npy (deflated 4%)
  adding: array_dataset/TRIW/P4/P4_456.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_1049.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_1093.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_35.npy (deflated 4%)
  adding: array_dataset/TRIW/P4/P4_372.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_251.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_853.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_177.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_1099.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_190.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_589.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_591.npy (deflated 5%)
  adding: array_dataset/TRIW/P4/P4_223.npy (deflated 5%)
  adding: array_dat

In [None]:
# clean the array_dataset folder from Colab
if IN_COLAB:
    import shutil
    shutil.rmtree('./array_dataset')

**Data Processing of P5, P6, P7, P8**

In [None]:
for jammer_path in jammer_folder:
    interference_type = jammer_path.split('_')[-1]  # extract the jamming type from the folder name
    if interference_type not in type_to_do: # skip if the interference type is not the same that we want to process
        continue
    print(f"Processing interference type: {interference_type}")

    #Create Columns
    features_name = [f'f{i}' for i in range(1, 17)] + ['power', 'path_spec', 'class']
    rows = []

    # dataset_output
    base_output_dir = os.path.join('./', 'image_dataset', interference_type)    # base output directory
    base_array_dir = os.path.join('./', 'array_dataset', interference_type)    # numpy array output directory
    os.makedirs(base_output_dir, exist_ok=True) # create the output directory if it does not exist
    os.makedirs(base_array_dir, exist_ok=True) # create array directory

    file_size = os.path.getsize(jammer_path)    # get the size of the file
    total_sec = round(file_size / fs_ / {'fc32': 8, 'sc16': 4, 'sc8': 2}[output_type_]) # calculate the total duration of the file in seconds

    print(f'File size: {file_size:,} bytes')
    print(f'Total seconds for {jammer_path}: {total_sec} ({total_sec/60:.1f} minutes)')

    signal_analysis_object = SignalAnalysis(fs=fs_, fc=fc_, output_type=output_type_, file_name=jammer_path)    # create the SignalAnalysis object

    # MAIN LOOP for creating dataset
    # Each jamming file contains 8 different power levels. Each power level lasts 2 minutes (120 seconds)
    # For each power level: skip first 5 seconds and last 5 seconds, process the middle 110 seconds
    power_duration = 120  # seconds per power level
    valid_duration_per_power = power_duration - 2 * skip_second  # 110 seconds of valid data per power level
    num_power_levels = 8

    for power_idx in range(int(num_power_levels/2)):
        power_idx += 4
        jammer_power = f"P{power_idx + 1}"  # P5, P6, P7, P8
        power_dir = os.path.join(base_array_dir, f"{jammer_power}")
        os.makedirs(power_dir, exist_ok=True)

        # Calculate time range for this power level
        power_start = power_idx * power_duration + skip_second  # Skip first 5 seconds
        power_end = (power_idx + 1) * power_duration - skip_second  # Skip last 5 seconds

        # Make sure we don't exceed file duration
        if power_start >= total_sec:
            print(f"Skipping power level {jammer_power} - beyond file duration")
            break
        if power_end > total_sec:
            power_end = total_sec

        print(f"Processing {jammer_power}: from {power_start}s to {power_end}s ({power_end - power_start:.1f}s)")

        # Process chunks within this power level
        loop_range = np.arange(power_start, power_end, chunk_separation_file)
        print(f"  Will process {len(loop_range)} chunks for {jammer_power}")

        try:
            for i, start_point_file in enumerate(loop_range):
                file_name_img = os.path.join(base_output_dir, f"{jammer_power}_{i}.jpg")
                spec_array = signal_analysis_object.spectrogram_image(start_point=start_point_file)
                file_name_npy = os.path.join(power_dir, f"{jammer_power}_{i}.npy")
                np.save(file_name_npy, spec_array)  # Saves as (128, 873) uint8 array

                features_signal = signal_analysis_object.extract_features(start_point=start_point_file)
                rows.append(features_signal + [jammer_power, file_name_img, interference_type])

                print(f"  Saved {file_name_npy}")

                # if 0 == i % 100:
                #     print(f"  {jammer_power}: {start_point_file:.1f}s (chunk {i+1}/{len(loop_range)})", end='\r', flush=True)

            print(f"  {jammer_power}: Completed {len(loop_range)} chunks                    ")

        except Exception as e:
            print(f"\nERROR in {jammer_power} at chunk {i}: {str(e)}")
            import traceback
            traceback.print_exc()
            print("Continuing to next power level...")
            continue

    print(f"\nTotal rows collected: {len(rows)}")
    df = pd.DataFrame(rows, columns=features_name)
    output_csv_path = os.path.join('./', 'image_dataset', f'features_file_{interference_type}.csv')
    df.to_csv(output_csv_path, sep=',')
    print(f"Completed {interference_type}, saved to {output_csv_path}")

Processing interference type: TRIW
File size: 76,918,321,536 bytes
Total seconds for /content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_154305_TRIW: 961 (16.0 minutes)
Processing P5: from 485s to 595s (110.0s)
  Will process 1100 chunks for P5
Using memory-mapped file access for LINKS_L1_10Mhz_complex_int8_20251202_154305_TRIW
  Saved ./array_dataset/TRIW/P5/P5_0.npy
  Saved ./array_dataset/TRIW/P5/P5_1.npy
  Saved ./array_dataset/TRIW/P5/P5_2.npy
  Saved ./array_dataset/TRIW/P5/P5_3.npy
  Saved ./array_dataset/TRIW/P5/P5_4.npy
  Saved ./array_dataset/TRIW/P5/P5_5.npy
  Saved ./array_dataset/TRIW/P5/P5_6.npy
  Saved ./array_dataset/TRIW/P5/P5_7.npy
  Saved ./array_dataset/TRIW/P5/P5_8.npy
  Saved ./array_dataset/TRIW/P5/P5_9.npy
  Saved ./array_dataset/TRIW/P5/P5_10.npy
  Saved ./array_dataset/TRIW/P5/P5_11.npy
  Saved ./array_dataset/TRIW/P5/P5_12.npy
  Saved ./array_dataset/TRIW/P5/P5_13.npy
  Saved ./array_dataset/TRIW/P5/P5_14.npy
  Saved ./array_datase

In [None]:
# download the folder array_dataset
if IN_COLAB:
    !zip -r array_dataset2.zip ./image_dataset/CLEAN/
    files.download("array_dataset2.zip")

  adding: image_dataset/CLEAN/ (stored 0%)
  adding: image_dataset/CLEAN/2337.npy (deflated 6%)
  adding: image_dataset/CLEAN/2480.npy (deflated 6%)
  adding: image_dataset/CLEAN/1591.npy (deflated 6%)
  adding: image_dataset/CLEAN/2661.npy (deflated 6%)
  adding: image_dataset/CLEAN/1736.npy (deflated 6%)
  adding: image_dataset/CLEAN/1495.npy (deflated 6%)
  adding: image_dataset/CLEAN/2305.npy (deflated 6%)
  adding: image_dataset/CLEAN/2038.npy (deflated 6%)
  adding: image_dataset/CLEAN/2559.npy (deflated 6%)
  adding: image_dataset/CLEAN/1525.npy (deflated 6%)
  adding: image_dataset/CLEAN/152.npy (deflated 6%)
  adding: image_dataset/CLEAN/183.npy (deflated 6%)
  adding: image_dataset/CLEAN/1582.npy (deflated 6%)
  adding: image_dataset/CLEAN/1881.npy (deflated 6%)
  adding: image_dataset/CLEAN/1514.npy (deflated 6%)
  adding: image_dataset/CLEAN/2000.npy (deflated 6%)
  adding: image_dataset/CLEAN/149.npy (deflated 6%)
  adding: image_dataset/CLEAN/1817.npy (deflated 6%)
  addi

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# clean the array_dataset folder from Colab
if IN_COLAB:
    import shutil
    shutil.rmtree('./array_dataset')

**Data Processing of CLEAN data**

In [None]:
for jammer_path in jammer_folder:
    interference_type = jammer_path.split('_')[-1]  # extract the jamming type from the folder name
    if interference_type not in type_to_do: # skip if the interference type is not the same that we want to process
        continue
    print(f"Processing interference type: {interference_type}")

    #Create Columns
    features_name = [f'f{i}' for i in range(1, 17)] + ['power', 'path_spec', 'class']
    rows = []

    # dataset_output
    base_output_dir = os.path.join('./', 'image_dataset', interference_type)    # base output directory
    base_array_dir = os.path.join('./', 'array_dataset', interference_type)    # numpy array output directory
    os.makedirs(base_output_dir, exist_ok=True) # create the output directory if it does not exist
    os.makedirs(base_array_dir, exist_ok=True) # create array directory

    file_size = os.path.getsize(jammer_path)    # get the size of the file
    total_sec = round(file_size / fs_ / {'fc32': 8, 'sc16': 4, 'sc8': 2}[output_type_]) # calculate the total duration of the file in seconds

    print(f'File size: {file_size:,} bytes')
    print(f'Total seconds for {jammer_path}: {total_sec} ({total_sec/60:.1f} minutes)')

    signal_analysis_object = SignalAnalysis(fs=fs_, fc=fc_, output_type=output_type_, file_name=jammer_path)    # create the SignalAnalysis object

    # Calculate time range for this power level
    start = skip_second  # Skip first 5 seconds
    end = total_sec - skip_second  # Skip last 5 seconds

    # Process chunks within this power level
    loop_range = np.arange(start, end, chunk_separation_file)
    print(f"  Will process {len(loop_range)} chunks for {interference_type}")

    try:
        for i, start_point_file in enumerate(loop_range):
            file_name_img = os.path.join(base_output_dir, f"{i}.jpg")
            spec_array = signal_analysis_object.spectrogram_image(start_point=start_point_file)
            file_name_npy = os.path.join(base_array_dir, f"{i}.npy")
            np.save(file_name_npy, spec_array)  # Saves as (128, 873) uint8 array

            features_signal = signal_analysis_object.extract_features(start_point=start_point_file)
            rows.append(features_signal + [file_name_img, interference_type])

            print(f"  Saved {file_name_npy}")

            # if 0 == i % 100:
            #     print(f"  {jammer_power}: {start_point_file:.1f}s (chunk {i+1}/{len(loop_range)})", end='\r', flush=True)

    except Exception as e:
        print(f"\nERROR in {jammer_power} at chunk {i}: {str(e)}")
        import traceback
        traceback.print_exc()
        print("Continuing to next power level...")
        continue

    print(f"\nTotal rows collected: {len(rows)}")
    df = pd.DataFrame(rows, columns=features_name)
    output_csv_path = os.path.join('./', 'image_dataset', f'features_file_{interference_type}.csv')
    df.to_csv(output_csv_path, sep=',')
    print(f"Completed {interference_type}, saved to {output_csv_path}")

Processing interference type: CLEAN
File size: 23,839,377,624 bytes
Total seconds for /content/drive/MyDrive/N-MON/Dataset_LINKS/LINKS_L1_10Mhz_complex_int8_20251202_160202_CLEAN: 298 (5.0 minutes)
  Will process 2880 chunks for CLEAN
Using memory-mapped file access for LINKS_L1_10Mhz_complex_int8_20251202_160202_CLEAN
  Saved ./image_dataset/CLEAN/0.npy
  Saved ./image_dataset/CLEAN/1.npy
  Saved ./image_dataset/CLEAN/2.npy
  Saved ./image_dataset/CLEAN/3.npy
  Saved ./image_dataset/CLEAN/4.npy
  Saved ./image_dataset/CLEAN/5.npy
  Saved ./image_dataset/CLEAN/6.npy
  Saved ./image_dataset/CLEAN/7.npy
  Saved ./image_dataset/CLEAN/8.npy
  Saved ./image_dataset/CLEAN/9.npy
  Saved ./image_dataset/CLEAN/10.npy
  Saved ./image_dataset/CLEAN/11.npy
  Saved ./image_dataset/CLEAN/12.npy
  Saved ./image_dataset/CLEAN/13.npy
  Saved ./image_dataset/CLEAN/14.npy
  Saved ./image_dataset/CLEAN/15.npy
  Saved ./image_dataset/CLEAN/16.npy
  Saved ./image_dataset/CLEAN/17.npy
  Saved ./image_dataset

ValueError: 19 columns passed, passed data had 18 columns