In [17]:

import soundfile as sf
import csv
import time
import threading
import sounddevice as sd
import numpy as np
from pynput import keyboard
from scipy.io.wavfile import write as wav_write
import os
import librosa


## CURRENT ISSUE : ESC is being recorded at the very end but the audio cuts, so its spectrogram is incomplete. Edit the code so that it is skipped

### The following code is used to record the audio and the keystrokes

*It is not used in the final project, but it is kept here for reference
*The audio recording will go on for 10 seconds. The keystroke recording will be done simultaneously but needs to be stopped manually by pressing ESC (escape)

Head's up : Dont forget to grant accessibility access the editor with which you're running the code below, in particular input monitoring for keystroke recording.


Version 2 : continuous audio recording with buffer

In [13]:
DATA_DIR = input("Enter the name of the keyboard")
try:
    os.mkdir(DATA_DIR)
except FileExistsError:
    print(f"Warning: The directory {DATA_DIR} already exists.")




In [14]:
import numpy as np
import csv
import time
import sounddevice as sd
import threading
import scipy.io.wavfile as wav
from pynput import keyboard

# Parameters for sound recording
sample_rate = 44100  # Hz
channels = 2  # Try stereo first
audio_buffer = []  # Buffer to store audio data

# File paths
audio_file = f'{DATA_DIR}/audio.wav'
log_file = f'{DATA_DIR}/key_log.csv'

# Initialize the keystroke log file
with open(log_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Key', 'Action', 'Timestamp'])

# Global variables
start_time_audio = None
stop_recording = False

def audio_callback(indata, frames, time, status):
    """Callback function for audio streaming"""
    if status:
        print(f"Audio callback status: {status}")
    audio_buffer.append(indata.copy())

def record_audio():
    global start_time_audio, stop_recording
    print("Recording audio...")
    
    # Set the start time of the recording
    start_time_audio = time.time()
    
    try:
        # Try stereo recording first
        with sd.InputStream(samplerate=sample_rate, channels=channels, callback=audio_callback):
            while not stop_recording:
                time.sleep(0.01)
    except Exception as e:
        print("Your computer does not support stereo recording. Defaulting to mono.")
        # Try mono recording
        with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback):
            while not stop_recording:
                time.sleep(0.01)
    
    # When stopped, save the recorded audio
    if audio_buffer:
        audio_data = np.concatenate(audio_buffer, axis=0)
        wav.write(audio_file, sample_rate, audio_data)
        print(f"Audio saved to {audio_file}")
    
    print("Audio recording finished")

# Keystroke listener function
def on_press(key,debug=False):
    """"
    helper function to listen for keystrokes and record them on a csv file
    :key : keyboard key object
    """
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char  # Normal keys
    except AttributeError:
        key_str = str(key)  # Special keys like shift, ctrl, etc.

    # Log the key press with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Pressed', round(timestamp, 6)])  # Round for cleaner timestamp

    if debug:
        print(f"Key {key_str} Pressed at {timestamp:.6f} seconds")

def on_release(key,debug=False):
    """"
    helper function to listen for keystrokes and record them on a csv file
    :key : keyboard key object
    """
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char
    except AttributeError:
        key_str = str(key)

    # Log the key release with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Released', round(timestamp, 6)])  # Round for cleaner timestamp

    if debug:
        print(f"Key {key_str} Released at {timestamp:.6f} seconds")

    # Stop listener if 'Esc' key is pressed
    if key == keyboard.Key.esc:
        global stop_recording
        stop_recording = True  # Set flag to stop both recordings
        return False

# Start recording audio in a separate thread
audio_thread = threading.Thread(target=record_audio)
audio_thread.start()

# Start the keyboard listener in the main thread to avoid blocking
def start_keyboard_listener():
    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        listener.join()

# Run the keyboard listener in a separate thread
keyboard_thread = threading.Thread(target=start_keyboard_listener)
keyboard_thread.start()

# Wait for both threads to finish, while checking for stop condition
while not stop_recording:
    time.sleep(0.01)  # Prevent high CPU usage by sleeping briefly

# Once 'Esc' is pressed, both threads will finish
audio_thread.join()
keyboard_thread.join()
print("Recording process finished.")

This process is not trusted! Input event monitoring will not be possible until it is added to accessibility clients.


Recording audio...
Your computer does not support stereo recording. Defaulting to mono.
Audio saved to macbook/audio.wav
Audio recording finished
Recording process finished.


In [3]:
print(sd.query_devices())

> 0 MacBook Pro Microphone, Core Audio (1 in, 0 out)
< 1 MacBook Pro Speakers, Core Audio (0 in, 2 out)
  2 Livebox-3E55 Microphone, Core Audio (1 in, 0 out)
  3 Microsoft Teams Audio, Core Audio (1 in, 1 out)


In [15]:
# Load the audio file
audio_path = audio_file
audio_data, sample_rate = sf.read(audio_path)

averages = []

# Read keystroke timestamps from CSV
keystroke_times = []
with open(log_file, "r") as audio_data_file:
    reader = csv.reader(audio_data_file)
    stack = {}

    for row in reader:
        if row[0] == "Key":
            continue
        key = row[0]
        action = row[1]
        timestamp = float(row[2])

        if action == "Pressed":
            if key in stack:
                # Ignore multiple presses of the same key
                continue
            stack[key] = [key, timestamp]

        elif action == "Released":
            if key not in stack:
                # Ignore releases of keys that weren't pressed
                continue
            stack[key].append(timestamp)
            keystroke_times.append(stack[key])
            del stack[key]

print(f"Total valid keystrokes: {len(keystroke_times)}")
print("Keystroke times:")
for key, press, release in keystroke_times:
    #print(f"Key: {key}, Press: {press:.3f}, Release: {release:.3f}, Duration: {release-press:.3f}")
    averages.append(release-press)

print("Average keystroke duration: ", sum(averages)/len(keystroke_times))


Total valid keystrokes: 390
Keystroke times:
Average keystroke duration:  0.07150655641025588


## The code that generates individual spectrograms for each keystroke

In [7]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")

CUDA available: False


The following code defines two method to extract the keystrokes and normalize them

1. Using a fixed buffer : split into two, increases the sampling time beyond press /release
> Used by literature in this IEEE article (https://ieeexplore.ieee.org/document/10190721)
2. Audio resampling 

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wav
import scipy.signal as signal
import csv
from scipy.ndimage import zoom
import os

# Parameters
AUDIO_FILE = audio_file
KEYSTROKE_CSV = log_file
OUTPUT_DIR = DATA_DIR + "/keystroke_spectrograms"
NUMPY_OUTPUT_DIR = DATA_DIR + "/numpy_arrays"  # New directory for NumPy arrays

## Hyperparameter to fine-tune
BUFFER_BEFORE = 0.2  # Extra time (seconds) before & after each keystroke
BUFFER_AFTER = 0.1

# Ensure output directories exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(NUMPY_OUTPUT_DIR, exist_ok=True)

# Load audio data
sample_rate, audio_data = wav.read(AUDIO_FILE)

# Read keystroke data from CSV
keystroke_times = []
with open(KEYSTROKE_CSV, "r") as file:
    reader = csv.reader(file)
    next(reader)  # Skip header
    stack = {}

    for row in reader:
        key, action, timestamp = row[0], row[1], float(row[2])

        if action == "Pressed":
            stack[key] = timestamp  # Store press time
        elif action == "Released" and key in stack:
            keystroke_times.append((key, stack.pop(key), timestamp))  # Store key, press, and release

# Function to create and save the spectrogram and numpy arrays
def create_spectrogram_and_numpy_resampled(audio_segment, key, idx,target_time_bins=300):
    # Generate the spectrogram using scipy

    f, t, Sxx = signal.spectrogram(audio_segment, sample_rate)
    Sxx_log = 10 * np.log10(Sxx + 1e-10)
    # converting the audio signal to dB scale (to better visualize the high / low signals)

    # Create new time points, evenly spaced
    time_zoom_factor = target_time_bins / Sxx_log.shape[1]

    # Interpolate to new time points
    Sxx_resampled = zoom(Sxx_log, (1, time_zoom_factor), order=5)
    # Normalize spectrogram for neural network (optional)

    # Plot the spectrogram
    plt.figure(figsize=(10, 4))
    plt.pcolormesh(t, f, Sxx_log, shading='auto', cmap='inferno')
    plt.colorbar(label='Power (dB)')
    plt.title(f"Keystroke '{key}' Spectrogram")
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")

    # Save the spectrogram to file as PNG
    spectrogram_path = os.path.join(OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.png")
    plt.savefig(spectrogram_path)
    plt.close()
    print(f"Saved spectrogram for '{key}' at {spectrogram_path}")
    
    # Save the spectrogram as a NumPy array
    # Save the resampled spectrogram
    numpy_array_path = os.path.join(NUMPY_OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.npy")

    np.save(numpy_array_path, Sxx_resampled)

    print(f"Saved resampled NumPy array for '{key}' at {numpy_array_path}")

# process each keystroke by sampling each key with press / release times
for idx, (key, press_time, release_time) in enumerate(keystroke_times):
    # Use exact press and release times without buffer
    start_time = max(0, press_time - BUFFER_BEFORE)  # Ensure we don't go before 0
    end_time = min(len(audio_data) / sample_rate, release_time + BUFFER_AFTER)  # Ensure we don't go beyond audio length
    
    start_sample = int(start_time * sample_rate)
    end_sample = int(end_time * sample_rate)
    # Extract audio segment
    keystroke_audio = audio_data[start_sample:end_sample]
    
    if len(keystroke_audio) == 0:
        print(f"Warning: Empty audio segment for keystroke {idx + 1}")
        continue

def create_spectrogram_and_numpy(audio_segment, key, idx):
    # Generate the spectrogram using scipy

    mel_spect = librosa.feature.melspectrogram(
        y=audio_segment,
        sr=sample_rate,
        n_mels=80,           # Reduced from 128 - still detailed enough
        n_fft=2048,          # Keep this - good balance
        hop_length=512,      # Changed from 1024 for better temporal resolution
        window='hann',       # Keep this - good choice
        power=2.0            # Keep this - standard choice
    )

    # Convert to log scale (dB)
    mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)

    # Normalize to 0-1 range
    mel_spect_norm = (mel_spect_db - mel_spect_db.min()) / (mel_spect_db.max() - mel_spect_db.min())
    # converting the audio signal to dB scale (to better visualize the high / low signals)


    # Normalize spectrogram for neural network (optional)

    # Plot the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(
        mel_spect_db,
        sr=sample_rate,
        x_axis='time',
        y_axis='mel',
        hop_length=1024
    )
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.tight_layout()

    # Save the spectrogram to file as PNG
    spectrogram_path = os.path.join(OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.png")
    plt.savefig(spectrogram_path)
    plt.close()
    print(f"Saved spectrogram for '{key}' at {spectrogram_path}")
    
    # Save the spectrogram as a NumPy array
    # Save the resampled spectrogram
    numpy_array_path = os.path.join(NUMPY_OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.npy")

    np.save(numpy_array_path, mel_spect_norm)

    print(f"Saved resampled NumPy array for '{key}' at {numpy_array_path}")

BUFFER = 0.07
# process each keystroke by sampling each key with press / release times
for idx, (key, press_time, release_time) in enumerate(keystroke_times):
    # Use exact press and release times without buffer
    start_time = max(0, press_time - BUFFER)  # Ensure we don't go before 0
    end_time = min(len(audio_data) / sample_rate, release_time + BUFFER)  # Ensure we don't go beyond audio length
    
    start_sample = int(start_time * sample_rate)
    end_sample = int(end_time * sample_rate)
    # Extract audio segment
    keystroke_audio = audio_data[start_sample:end_sample]
    
    if len(keystroke_audio) == 0:
        print(f"Warning: Empty audio segment for keystroke {idx + 1}")
        continue
        
    create_spectrogram_and_numpy(keystroke_audio, key, idx)

print("Processing complete. Spectrograms and NumPy arrays saved.")


Saved spectrogram for 'Key.cmd' at macbook/keystroke_spectrograms/keystroke_1_Key.cmd.png
Saved resampled NumPy array for 'Key.cmd' at macbook/numpy_arrays/keystroke_1_Key.cmd.npy
Saved spectrogram for 's' at macbook/keystroke_spectrograms/keystroke_2_s.png
Saved resampled NumPy array for 's' at macbook/numpy_arrays/keystroke_2_s.npy
Saved spectrogram for 'e' at macbook/keystroke_spectrograms/keystroke_3_e.png
Saved resampled NumPy array for 'e' at macbook/numpy_arrays/keystroke_3_e.npy
Saved spectrogram for 'r' at macbook/keystroke_spectrograms/keystroke_4_r.png
Saved resampled NumPy array for 'r' at macbook/numpy_arrays/keystroke_4_r.npy
Saved spectrogram for 't' at macbook/keystroke_spectrograms/keystroke_5_t.png
Saved resampled NumPy array for 't' at macbook/numpy_arrays/keystroke_5_t.npy
Saved spectrogram for 'h' at macbook/keystroke_spectrograms/keystroke_6_h.png
Saved resampled NumPy array for 'h' at macbook/numpy_arrays/keystroke_6_h.npy
Saved spectrogram for 'g' at macbook/key

In [None]:
# ALL SET ! Head up to the nn.ipynb file to train the model