In [None]:
import os
import struct

def has_csi(file_path):
    # 1. Check file exists
    if not os.path.exists(file_path):
        print("File not found:", file_path)
        return False

    size = os.path.getsize(file_path)

    # 2. CSI logs are usually > 4KB
    if size < 4096:
        print("File too small to contain CSI data.")
        return False

    with open(file_path, "rb") as f:
        data = f.read()

    # 3. PicoScenes CSI always contains string "PicoScenes"
    if b"PicoScenes" in data:
        print("Detected PicoScenes data structure.")
        return True

    # 4. Intel 5300 CSI file usually contains the magic number at start
    # struct: FieldLen (u16) | Timestamp (u64)
    try:
        field_len = struct.unpack("H", data[0:2])[0]
        if 50 < field_len < 500:  # typical CSI payload size
            print("Likely Intel 5300 CSI format (valid field length).")
            return True
    except:
        pass

    # 5. Look for common CSI matrices pattern: lots of complex numbers
    complex_count = 0
    for i in range(0, min(len(data), 50000), 4):  # scan only first 50 KB
        try:
            val = struct.unpack("h", data[i:i+2])[0]
            val2 = struct.unpack("h", data[i+2:i+4])[0]
            # Real/Imag values are typically between -30000 and 30000
            if -32768 < val < 32767 and -32768 < val2 < 32767:
                complex_count += 1
        except:
            continue

    if complex_count > 1000:
        print("CSI-like complex IQ patterns detected.")
        return True

    # Otherwise, not CSI
    return False


# --------------------------
# Run check
# --------------------------
file_path = "/content/1.csi"

if has_csi(file_path):
    print("\n‚úÖ YES ‚Äî This file contains CSI data.")
else:
    print("\n‚ùå NO ‚Äî This file does NOT contain CSI data.")


CSI-like complex IQ patterns detected.

‚úÖ YES ‚Äî This file contains CSI data.


In [None]:
import os
import struct

def print_csi_evidence(file_path):
    if not os.path.exists(file_path):
        print("‚ùå File not found:", file_path)
        return

    size = os.path.getsize(file_path)
    print(f"üìÅ File: {file_path}")
    print(f"üì¶ Size: {size} bytes")

    with open(file_path, "rb") as f:
        data = f.read()

    print("\n=== Checking CSI Evidence ===")

    # ---------------------------------------------------------
    # 1. Detect PicoScenes magic keyword
    # ---------------------------------------------------------
    if b"PicoScenes" in data:
        print("üîµ Evidence 1: Found PicoScenes signature 'PicoScenes'")
        offset = data.find(b"PicoScenes")
        print(f"     ‚Üí Occurs at byte offset: {offset}")
    else:
        print("‚ö™ No PicoScenes signature found.")

    # ---------------------------------------------------------
    # 2. Detect Intel 5300 CSI: field len (first 2 bytes)
    # ---------------------------------------------------------
    try:
        field_len = struct.unpack("H", data[0:2])[0]
        if 50 < field_len < 500:
            print("üîµ Evidence 2: Intel 5300 CSI Field Length detected:", field_len)
        else:
            print("‚ö™ Intel 5300 CSI field length not found.")
    except:
        print("‚ö™ Unable to read field length.")

    # ---------------------------------------------------------
    # 3. Look for thousands of complex pairs (int16 real, int16 imag)
    # ---------------------------------------------------------
    complex_pairs = 0
    sample_values = []

    for i in range(0, min(len(data), 20000), 4):  # scan first 20 KB
        try:
            real = struct.unpack("h", data[i:i+2])[0]
            imag = struct.unpack("h", data[i+2:i+4])[0]

            # IQ values usually fall in [-32768, 32767]
            if abs(real) < 33000 and abs(imag) < 33000:
                complex_pairs += 1
                if len(sample_values) < 5:  # save 5 samples as proof
                    sample_values.append((real, imag))
        except:
            continue

    if complex_pairs > 1000:
        print(f"üîµ Evidence 3: Detected {complex_pairs} complex IQ pairs.")
        print("     ‚Üí First few CSI-like values:")
        for idx, (r, im) in enumerate(sample_values):
            print(f"        CSI[{idx}] = ({r}, {im})")
    else:
        print("‚ö™ Complex IQ pattern NOT detected.")

    # ---------------------------------------------------------
    # 4. Final verdict
    # ---------------------------------------------------------
    if (b"PicoScenes" in data) or (complex_pairs > 1000) or (50 < field_len < 500):
        print("\n‚úÖ YES ‚Äî This file contains CSI data.")
    else:
        print("\n‚ùå NO ‚Äî This file does not contain CSI data.")


# ------------------------------ #
# Run the checker
# ------------------------------ #
file_path = "/content/1.csi"
print_csi_evidence(file_path)

üìÅ File: /content/1.csi
üì¶ Size: 186951 bytes

=== Checking CSI Evidence ===
‚ö™ No PicoScenes signature found.
‚ö™ Intel 5300 CSI field length not found.
üîµ Evidence 3: Detected 5000 complex IQ pairs.
     ‚Üí First few CSI-like values:
        CSI[0] = (1039, 0)
        CSI[1] = (789, 8213)
        CSI[2] = (1, 13316)
        CSI[3] = (0, 2304)
        CSI[4] = (30802, 16979)

‚úÖ YES ‚Äî This file contains CSI data.


In [4]:
from google.colab import files

uploaded = files.upload()
file_path = list(uploaded.keys())[0]  # first uploaded file
print("Loaded file:", file_path)

Saving rx_4_251203_002032.csi to rx_4_251203_002032.csi
Loaded file: rx_4_251203_002032.csi


In [5]:
import struct
import numpy as np
import matplotlib.pyplot as plt

def parse_picoscenes_packets(data):
    """
    PicoScenes packets begin with signature b'PicoScenes'
    followed by metadata and timestamp info.

    This parser scans for occurrences of the signature
    and extracts basic metadata + timestamps.
    """

    signature = b"PicoScenes"
    packets = []
    offsets = []

    idx = 0
    while True:
        offset = data.find(signature, idx)
        if offset == -1:
            break
        offsets.append(offset)
        idx = offset + len(signature)

    for offset in offsets:
        try:
            # Approximate timestamp structure:
            # ... [signature][8-byte timestamp][MACs][payload] ...

            ts_bytes = data[offset+len(signature): offset+len(signature)+8]
            timestamp = struct.unpack("<Q", ts_bytes)[0]  # little-endian uint64

            # Extract MAC addresses (search for 6-byte patterns)
            macs = []
            for i in range(offset, offset + 500):
                # Try to read 6 bytes
                mac = data[i:i+6]
                if len(mac) < 6:
                    break
                # Check plausibility: not all zeros or all FF
                if mac not in (b"\x00"*6, b"\xff"*6):
                    macs.append(mac)

            packets.append({
                "offset": offset,
                "timestamp": timestamp,
                "macs": macs
            })
        except:
            continue

    return packets


def mac_to_str(mac):
    return ":".join(f"{b:02x}" for b in mac)


def extract_channel_bandwidth(data):
    """
    PicoScenes logs usually contain 'HT20', 'HT40', 'VHT80', etc.
    """

    bw_keywords = [b"HT20", b"HT40", b"HT80", b"VHT80", b"VHT160"]
    for bw in bw_keywords:
        if bw in data:
            return bw.decode()
    return "Unknown"


def estimate_channel(data):
    """
    Searches for common WiFi channels in the binary.
    """
    channels = list(range(1, 200))
    for ch in channels:
        if bytes(str(ch), 'utf-8') in data:
            return ch
    return "Unknown"

In [6]:
# Read the binary CSI file
with open(file_path, "rb") as f:
    data = f.read()

packets = parse_picoscenes_packets(data)

print("===================================")
print("üì¶ FULL METADATA DUMP")
print("===================================")
print(f"Total PicoScenes packets detected: {len(packets)}")

timestamps = [p["timestamp"] for p in packets if "timestamp" in p]

if len(timestamps) > 1:
    duration = (max(timestamps) - min(timestamps)) / 1e6  # timestamps in microseconds
    print(f"üïí Capture Duration: {duration:.3f} seconds")
else:
    print("Not enough timestamps to compute duration.")

# Channel + Bandwidth detection
bandwidth = extract_channel_bandwidth(data)
channel = estimate_channel(data)

print(f"üì° Channel: {channel}")
print(f"üì∂ Bandwidth Mode: {bandwidth}")

# MAC address extraction
all_macs = set()
for p in packets:
    for mac in p["macs"]:
        mac_str = mac_to_str(mac)
        # Only keep real MACs
        if not mac_str.startswith("00:00") and mac_str != "ff:ff:ff:ff:ff:ff":
            all_macs.add(mac_str)

print("\nüß≠ ALL MAC ADDRESSES SEEN:")
for m in all_macs:
    print("  ‚Üí", m)

print("\nüìä Number of CSI Packets Collected:", len(packets))

# CSI Rate
if len(timestamps) > 1:
    total_time = (max(timestamps) - min(timestamps)) / 1e6
    rate = len(timestamps) / total_time
    print(f"üìà Estimated CSI Rate: {rate:.2f} Hz")

üì¶ FULL METADATA DUMP
Total PicoScenes packets detected: 0
Not enough timestamps to compute duration.
üì° Channel: 1
üì∂ Bandwidth Mode: Unknown

üß≠ ALL MAC ADDRESSES SEEN:

üìä Number of CSI Packets Collected: 0


In [7]:
if len(timestamps) > 0:
    t0 = min(timestamps)
    timeline = [(t - t0) / 1e6 for t in timestamps]

    plt.figure(figsize=(12, 4))
    plt.plot(timeline, marker='o', linestyle='-', markersize=3)
    plt.xlabel("Time (seconds)")
    plt.ylabel("Packet Index")
    plt.title("üìä CSI Timeline Plot")
    plt.grid(True)
    plt.show()
else:
    print("No timestamps available for timeline plot.")

No timestamps available for timeline plot.
