In [4]:

import soundfile as sf
import csv
import time
import threading
import sounddevice as sd
import numpy as np
from pynput import keyboard
from scipy.io.wavfile import write as wav_write

## CURRENT ISSUE : ESC is being recorded at the very end but the audio cuts, so its spectrogram is incomplete. Edit the code so that it is skipped

### The following code is used to record the audio and the keystrokes

*It is not used in the final project, but it is kept here for reference
*The audio recording will go on for 10 seconds. The keystroke recording will be done simultaneously but needs to be stopped manually by pressing ESC (escape)

Head's up : Dont forget to grant accessibility access the editor with which you're running the code below, in particular input monitoring for keystroke recording.


In [None]:
import numpy as np
import csv
import time
import sounddevice as sd
import threading
import scipy.io.wavfile as wav
from pynput import keyboard

# Parameters for sound recording
sample_rate = 44100  # Hz
duration = 60  # seconds
audio_data = np.zeros((duration * sample_rate, 1), dtype=np.int16)

# File paths
audio_file = 'keyboard_sound.wav'
log_file = 'key_log.csv'

# Initialize the keystroke log file
with open(log_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Key', 'Action', 'Timestamp'])

# Global variable to track the start time of audio recording
start_time_audio = None
stop_recording = False  # Flag to stop both audio and keyboard listeners

# Recording function for audio
def record_audio():
    global audio_data, start_time_audio
    print("Recording audio...")

    # Set the start time of the recording
    start_time_audio = time.time()  # Capture the timestamp when recording starts

    # Record the audio and store it in audio_data
    try : 
        audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2, dtype='int16')
        sd.wait()
        wav.write(audio_file, sample_rate, audio_data)
        print(f"Audio saved to {audio_file}")
    except:
        print("Your computer does not support stereo recording. Defaulting to mono.")
        audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='int16')
        sd.wait()
        wav.write(audio_file, sample_rate, audio_data)
        print(f"Audio saved to {audio_file}")

    print("Audio recording finished")

    # Save the recorded audio to a WAV file

# Keystroke listener function
def on_press(key):
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char  # Normal keys
    except AttributeError:
        key_str = str(key)  # Special keys like shift, ctrl, etc.

    # Log the key press with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Pressed', round(timestamp, 6)])  # Round for cleaner timestamp

    print(f"Key {key_str} Pressed at {timestamp:.6f} seconds")

def on_release(key):
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char
    except AttributeError:
        key_str = str(key)

    # Log the key release with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Released', round(timestamp, 6)])  # Round for cleaner timestamp

    print(f"Key {key_str} Released at {timestamp:.6f} seconds")

    # Stop listener if 'Esc' key is pressed
    if key == keyboard.Key.esc:
        global stop_recording
        stop_recording = True  # Set flag to stop both recordings
        return False

# Start recording audio in a separate thread
audio_thread = threading.Thread(target=record_audio)
audio_thread.start()

# Start the keyboard listener in the main thread to avoid blocking
def start_keyboard_listener():
    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        listener.join()

# Run the keyboard listener in a separate thread
keyboard_thread = threading.Thread(target=start_keyboard_listener)
keyboard_thread.start()

# Wait for both threads to finish, while checking for stop condition
while not stop_recording:
    time.sleep(0.1)  # Prevent high CPU usage by sleeping briefly

# Once 'Esc' is pressed, both threads will finish
audio_thread.join()
keyboard_thread.join()
print("Recording process finished.")

Version 2 : continuous audio recording with buffer

In [2]:
import numpy as np
import csv
import time
import sounddevice as sd
import threading
import scipy.io.wavfile as wav
from pynput import keyboard

# Parameters for sound recording
sample_rate = 44100  # Hz
channels = 2  # Try stereo first
audio_buffer = []  # Buffer to store audio data

# File paths
audio_file = 'keyboard_sound_dell.wav'
log_file = 'key_log_dell.csv'

# Initialize the keystroke log file
with open(log_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Key', 'Action', 'Timestamp'])

# Global variables
start_time_audio = None
stop_recording = False

def audio_callback(indata, frames, time, status):
    """Callback function for audio streaming"""
    if status:
        print(f"Audio callback status: {status}")
    audio_buffer.append(indata.copy())

def record_audio():
    global start_time_audio, stop_recording
    print("Recording audio...")
    
    # Set the start time of the recording
    start_time_audio = time.time()
    
    try:
        # Try stereo recording first
        with sd.InputStream(samplerate=sample_rate, channels=channels, callback=audio_callback):
            while not stop_recording:
                time.sleep(0.01)
    except Exception as e:
        print("Your computer does not support stereo recording. Defaulting to mono.")
        # Try mono recording
        with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback):
            while not stop_recording:
                time.sleep(0.01)
    
    # When stopped, save the recorded audio
    if audio_buffer:
        audio_data = np.concatenate(audio_buffer, axis=0)
        wav.write(audio_file, sample_rate, audio_data)
        print(f"Audio saved to {audio_file}")
    
    print("Audio recording finished")

# Keystroke listener function
def on_press(key,debug=False):
    """"
    helper function to listen for keystrokes and record them on a csv file
    :key : keyboard key object
    """
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char  # Normal keys
    except AttributeError:
        key_str = str(key)  # Special keys like shift, ctrl, etc.

    # Log the key press with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Pressed', round(timestamp, 6)])  # Round for cleaner timestamp

    if debug:
        print(f"Key {key_str} Pressed at {timestamp:.6f} seconds")

def on_release(key,debug=False):
    """"
    helper function to listen for keystrokes and record them on a csv file
    :key : keyboard key object
    """
    if start_time_audio is None:
        return  # Don't log if the audio hasn't started yet

    timestamp = time.time() - start_time_audio  # Calculate relative timestamp
    try:
        key_str = key.char
    except AttributeError:
        key_str = str(key)

    # Log the key release with relative timestamp
    with open(log_file, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([key_str, 'Released', round(timestamp, 6)])  # Round for cleaner timestamp

    if debug:
        print(f"Key {key_str} Released at {timestamp:.6f} seconds")

    # Stop listener if 'Esc' key is pressed
    if key == keyboard.Key.esc:
        global stop_recording
        stop_recording = True  # Set flag to stop both recordings
        return False

# Start recording audio in a separate thread
audio_thread = threading.Thread(target=record_audio)
audio_thread.start()

# Start the keyboard listener in the main thread to avoid blocking
def start_keyboard_listener():
    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        listener.join()

# Run the keyboard listener in a separate thread
keyboard_thread = threading.Thread(target=start_keyboard_listener)
keyboard_thread.start()

# Wait for both threads to finish, while checking for stop condition
while not stop_recording:
    time.sleep(0.01)  # Prevent high CPU usage by sleeping briefly

# Once 'Esc' is pressed, both threads will finish
audio_thread.join()
keyboard_thread.join()
print("Recording process finished.")

This process is not trusted! Input event monitoring will not be possible until it is added to accessibility clients.


Recording audio...
Your computer does not support stereo recording. Defaulting to mono.
Audio saved to keyboard_sound_dell.wav
Audio recording finished
Recording process finished.


In [3]:
print(sd.query_devices())

> 0 MacBook Pro Microphone, Core Audio (1 in, 0 out)
< 1 MacBook Pro Speakers, Core Audio (0 in, 2 out)
  2 Livebox-3E55 Microphone, Core Audio (1 in, 0 out)
  3 Microsoft Teams Audio, Core Audio (1 in, 1 out)


In [6]:
# Load the audio file
audio_path = "keyboard_sound_dell.wav"
audio_data, sample_rate = sf.read(audio_path)

# Read keystroke timestamps from CSV
keystroke_times = []
with open("key_log_dell.csv", "r") as audio_data_file:
    reader = csv.reader(audio_data_file)
    stack = {}

    for row in reader:
        if row[0] == "Key":
            continue
        key = row[0]
        action = row[1]
        timestamp = float(row[2])

        if action == "Pressed":
            if key in stack:
                # Ignore multiple presses of the same key
                continue
            stack[key] = [key, timestamp]

        elif action == "Released":
            if key not in stack:
                # Ignore releases of keys that weren't pressed
                continue
            stack[key].append(timestamp)
            keystroke_times.append(stack[key])
            del stack[key]

print(f"Total valid keystrokes: {len(keystroke_times)}")
print("Keystroke times:")
for key, press, release in keystroke_times:
    print(f"Key: {key}, Press: {press:.3f}, Release: {release:.3f}, Duration: {release-press:.3f}")

Total valid keystrokes: 425
Keystroke times:
Key: h, Press: 1.837, Release: 1.929, Duration: 0.092
Key: d, Press: 2.235, Release: 2.309, Duration: 0.074
Key: d, Press: 2.543, Release: 2.647, Duration: 0.104
Key: h, Press: 2.780, Release: 2.828, Duration: 0.048
Key: d, Press: 2.931, Release: 3.012, Duration: 0.081
Key: h, Press: 3.036, Release: 3.126, Duration: 0.090
Key: d, Press: 3.185, Release: 3.310, Duration: 0.125
Key: y, Press: 4.560, Release: 4.634, Duration: 0.074
Key: t, Press: 4.849, Release: 4.916, Duration: 0.067
Key: e, Press: 5.018, Release: 5.103, Duration: 0.084
Key: h, Press: 5.178, Release: 5.244, Duration: 0.065
Key: t, Press: 5.410, Release: 5.499, Duration: 0.090
Key: y, Press: 5.630, Release: 5.681, Duration: 0.052
Key: u, Press: 5.966, Release: 6.019, Duration: 0.052
Key: g, Press: 6.285, Release: 6.363, Duration: 0.077
Key: h, Press: 6.487, Release: 6.551, Duration: 0.064
Key: j, Press: 6.726, Release: 6.793, Duration: 0.067
Key: y, Press: 6.918, Release: 6.995,

## The code that generates individual spectrograms for each keystroke

In [7]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")

CUDA available: False


In [11]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wav
import scipy.signal as signal
import csv
from scipy.ndimage import zoom
import os

# Parameters
AUDIO_FILE = "keyboard_sound_dell.wav"  # Replace with your actual audio file
KEYSTROKE_CSV = "key_log_dell.csv"
OUTPUT_DIR = "keystroke_spectrograms/keystroke_spectrograms_dell"
NUMPY_OUTPUT_DIR = "keystroke_spectrograms/numpy_arrays_dell"  # New directory for NumPy arrays

## Hyperparameter to fine-tune
BUFFER_BEFORE = 0.2  # Extra time (seconds) before & after each keystroke
BUFFER_AFTER = 0.1

# Ensure output directories exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(NUMPY_OUTPUT_DIR, exist_ok=True)

# Load audio data
sample_rate, audio_data = wav.read(AUDIO_FILE)

# Read keystroke data from CSV
keystroke_times = []
with open(KEYSTROKE_CSV, "r") as file:
    reader = csv.reader(file)
    next(reader)  # Skip header
    stack = {}

    for row in reader:
        key, action, timestamp = row[0], row[1], float(row[2])

        if action == "Pressed":
            stack[key] = timestamp  # Store press time
        elif action == "Released" and key in stack:
            keystroke_times.append((key, stack.pop(key), timestamp))  # Store key, press, and release

# Function to create and save the spectrogram and numpy arrays
def create_spectrogram_and_numpy(audio_segment, key, idx,target_time_bins=300):
    # Generate the spectrogram using scipy

    f, t, Sxx = signal.spectrogram(audio_segment, sample_rate)
    Sxx_log = 10 * np.log10(Sxx + 1e-10)
    # converting the audio signal to dB scale (to better visualize the high / low signals)

    # Create new time points, evenly spaced
    time_zoom_factor = target_time_bins / Sxx_log.shape[1]

    # Interpolate to new time points
    Sxx_resampled = zoom(Sxx_log, (1, time_zoom_factor), order=5)

    

    # Normalize spectrogram for neural network (optional)

    # Plot the spectrogram
    plt.figure(figsize=(10, 4))
    plt.pcolormesh(t, f, Sxx_log, shading='auto', cmap='inferno')
    plt.colorbar(label='Power (dB)')
    plt.title(f"Keystroke '{key}' Spectrogram")
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")

    # Save the spectrogram to file as PNG
    spectrogram_path = os.path.join(OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.png")
    plt.savefig(spectrogram_path)
    plt.close()
    print(f"Saved spectrogram for '{key}' at {spectrogram_path}")
    
    # Save the spectrogram as a NumPy array
    # Save the resampled spectrogram
    numpy_array_path = os.path.join(NUMPY_OUTPUT_DIR, f"keystroke_{idx + 1}_{key}.npy")

    np.save(numpy_array_path, Sxx_resampled)

    print(f"Saved resampled NumPy array for '{key}' at {numpy_array_path}")

# process each keystroke by sampling each key with press / release times
for idx, (key, press_time, release_time) in enumerate(keystroke_times):
    # Use exact press and release times without buffer
    start_time = max(0, press_time - BUFFER_BEFORE)  # Ensure we don't go before 0
    end_time = min(len(audio_data) / sample_rate, release_time + BUFFER_AFTER)  # Ensure we don't go beyond audio length
    
    start_sample = int(start_time * sample_rate)
    end_sample = int(end_time * sample_rate)
    # Extract audio segment
    keystroke_audio = audio_data[start_sample:end_sample]
    
    if len(keystroke_audio) == 0:
        print(f"Warning: Empty audio segment for keystroke {idx + 1}")
        continue
        
    create_spectrogram_and_numpy(keystroke_audio, key, idx)

print("Processing complete. Spectrograms and NumPy arrays saved.")


Saved spectrogram for 'h' at keystroke_spectrograms/keystroke_spectrograms_dell/keystroke_1_h.png
Saved resampled NumPy array for 'h' at keystroke_spectrograms/numpy_arrays_dell/keystroke_1_h.npy
Saved spectrogram for 'd' at keystroke_spectrograms/keystroke_spectrograms_dell/keystroke_2_d.png
Saved resampled NumPy array for 'd' at keystroke_spectrograms/numpy_arrays_dell/keystroke_2_d.npy
Saved spectrogram for 'd' at keystroke_spectrograms/keystroke_spectrograms_dell/keystroke_3_d.png
Saved resampled NumPy array for 'd' at keystroke_spectrograms/numpy_arrays_dell/keystroke_3_d.npy
Saved spectrogram for 'h' at keystroke_spectrograms/keystroke_spectrograms_dell/keystroke_4_h.png
Saved resampled NumPy array for 'h' at keystroke_spectrograms/numpy_arrays_dell/keystroke_4_h.npy
Saved spectrogram for 'd' at keystroke_spectrograms/keystroke_spectrograms_dell/keystroke_5_d.png
Saved resampled NumPy array for 'd' at keystroke_spectrograms/numpy_arrays_dell/keystroke_5_d.npy
Saved spectrogram fo