In [1]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import string
import re
import matplotlib.pyplot as plt

In [10]:
# Check if .gitignore exists and handle *.npy exception

try:
    os.chdir('/home/test')
    os.listdir()
except:
    print("Could not change directory to /home/test")
    print("If you intended to run the code locally, you can skip this message")

gitignore_path = "../.gitignore"
if os.path.exists(gitignore_path):
    # Read current content
    with open(gitignore_path, "r") as f:
        content = f.read()
    
    # Only append if *.npy is not already in the file
    if "*.npy" not in content:
        with open(gitignore_path, "a") as f:
            f.write("\n*.npy")  # Add newline before appending
else:
    # Create new .gitignore if it doesn't exist
    with open(gitignore_path, "w") as f:
        f.write("*.npy")


DATA_DIR = input("Enter the name of the directory")
if not os.path.exists(DATA_DIR):
    print(f"Error: The directory {DATA_DIR} does not exist.")


In [2]:
import numpy as np
import torch
import os
import torch.nn.functional as F  # Import F for padding

# Path to the directory containing the saved NumPy arrays
DATA_DIR = "dell_test"
NUMPY_DIR = DATA_DIR + "/numpy_arrays"
BASE_SIZE = torch.Size([1,129,300])

# Padding function
def pad_tensor(tensor, target_width):
    current_width = tensor.shape[2]
    if current_width < target_width:
        padding = target_width - current_width
        return F.pad(tensor, (0, padding))  # Pad the time dimension
    elif current_width > target_width:
        return tensor[:, :, :target_width]  # Truncate to target width
    return tensor

# Function to load all spectrograms from the directory
def load_spectrograms_from_directory(directory):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    spectrograms = []
    keys = []
    widths = []
    filenames = os.listdir(directory)

    # Loop through each file in the directory
    for filename in filenames:
        if filename.endswith('.npy'):
            file_path = os.path.join(directory, filename)

            # Load the NumPy array from file
            spectrogram = np.load(file_path)

            widths.append(spectrogram.shape[1])

            try: 
                key = re.search(r"keystroke_\d+_([A-Za-z])\.npy", filename).group(1)
            except AttributeError:
                print(f"Error: Could not extract key from filename: {filename}. Make sure it is a valid key (aA-zZ)")
                continue

            # Convert the spectrogram to a PyTorch tensor
            spectrogram_tensor = torch.tensor(spectrogram).float().to(device)

            

            if spectrogram_tensor.shape != BASE_SIZE:
                print(f"Warning : Spectrogram tensor shape {spectrogram_tensor.shape} does not match the expected size {BASE_SIZE}")
                continue
            
            #spectrogram_tensor = spectrogram_tensor.unsqueeze(0)

            
            # Rearrange dimensions from [80, 13, 4] to [4, 80, 13]
            #spectrogram_tensor = spectrogram_tensor.permute(2, 0, 1)
            
            # Pad or truncate the time dimension to match BASE_SIZE[2] (300)
            """"
            if spectrogram_tensor.shape[2] < BASE_SIZE[2]:
                # Pad with zeros
                padding = BASE_SIZE[2] - spectrogram_tensor.shape[2]
                spectrogram_tensor = F.pad(spectrogram_tensor, (0, padding))
            else:
                # Truncate to desired length
                spectrogram_tensor = spectrogram_tensor[:, :, :BASE_SIZE[2]]

            if spectrogram_tensor.shape != BASE_SIZE:
                print(f"Warning: Spectrogram tensor shape {spectrogram_tensor.shape} does not match the expected size {BASE_SIZE}")
            """

            # Add the tensor to the list
            spectrograms.append(spectrogram_tensor)
            keys.append(key)

    print(f"loaded {len(spectrograms)} spectrograms")
    return spectrograms, keys, max(widths)


spectrogram_tensors, keys, max_width = load_spectrograms_from_directory(NUMPY_DIR)

assert len(spectrogram_tensors) == len(keys), "The number of spectrograms and keys do not match!"


## Need to pad the spectrograms to the same width


# Apply padding to all spectrograms
"""
for i in range(len(spectrogram_tensors)):
        if (padded := pad_tensor(spectrogram_tensors[i], 300)) is not None:
            spectrogram_tensors[i] = padded
"""


# Verify padding
for i, spectrogram in enumerate(spectrogram_tensors):
    #print(f"Shape of spectrogram {i}: {spectrogram.shape}")
    pass

alphabet = "abcdefghijklmnopqrstuvwxyz"
char2idx = {char: idx for idx, char in enumerate(alphabet)}
label_indices = [char2idx[char] for char in keys]
label_tensor = torch.tensor(label_indices, dtype=torch.long)


Error: Could not extract key from filename: keystroke_67_Key.esc.npy. Make sure it is a valid key (aA-zZ)
loaded 66 spectrograms


In [3]:
class KeystrokeDataset(Dataset):
    def __init__(self, spectrograms_tensors, labels):
        self.spectrograms = spectrograms_tensors
        self.labels = labels

    def __len__(self):
        return len(self.spectrograms)

    def __getitem__(self, idx):
        return self.spectrograms[idx], self.labels[idx]

### Optional : save the tensors in a file for storing
* the following cell is optional and can be skipped

In [13]:
import paramiko
from scp import SCPClient

def export_data():

    keyboards = ("dell","dell_test","macbook")

    hostname = "109.222.56.181"
    port = 5002
    username = "test"
    password = "test"

    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    for keyboard in keyboards: 
        try:
            try : 
                client.connect(hostname, username=username, password=password,timeout=20,port=port)
            except TimeoutError:
                print("could not establish a connection")

            scp = SCPClient(client.get_transport())
            scp.put(f"{keyboard}/numpy_arrays", f"/home/test/{keyboard}", recursive=True)
            print("migrated the numpy arrays")
            scp.put(f"{keyboard}/aligned_iphone.wav", f"/home/test/{keyboard}")
            print("migrated the audio data")
            scp.put(f"{keyboard}/key_log.csv", f"/home/test/{keyboard}")
            print("migrated the keyboard keystroke data")

        finally:
            client.close()

    # export the data to GPU server
    #from subprocess import run
    #run(['sshpass', '-p', 'test', 'scp', '-r', '-P', '5002', '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', '/Users/oscartesniere/Documents/GitHub/MAIS202-FinalProject/code/', 'test@oscartesniere.com:/home/test'])

export_data()



In [22]:
import os
try:
    os.chdir('/home/test')
    loaded_data = torch.load('keystroke_data.pt')
    # Extract the tensors and labels
    spectrogram_tensors = loaded_data['tensors']
    label_tensor = loaded_data['labels']
except:
    print("Could not change directory to /home/test")
    print("If you intended to run the code locally, you can skip this message")
    

train_dataset = KeystrokeDataset(spectrogram_tensors,label_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


Could not change directory to /home/test
If you intended to run the code locally, you can skip this message


In [6]:
class KeystrokeCNN(nn.Module):
    def __init__(self,input_height=129, input_width=300, num_classes=26,input_channels=1):
        super(KeystrokeCNN, self).__init__()

        self.input_channels = input_channels
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, padding=1)  # (1, H, W) -> (32, H, W)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # (32, H, W) -> (64, H, W)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # (64, H, W) -> (128, H, W)

        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)  # Reduces dimensions by half (H/2, W/2)

        # Calculate the final feature map size dynamically
        self._to_linear = self._get_conv_output_size(input_height, input_width)

        # Fully connected layers
        self.fc1 = nn.Linear(self._to_linear, 512)  
        self.fc2 = nn.Linear(512, num_classes)  # 26 output classes (A-Z)
        #list(string.ascii_lowercase) for generating the output classes

    def _get_conv_output_size(self, height, width):
        """Pass a dummy tensor to determine final feature map size after convolutions"""
        x = torch.zeros(1, self.input_channels, height, width)  # Batch size = 1, 1 channel, (H, W)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        return x.numel()  # Flattened size
    
    def forward(self, x):
        
        print("Input shape:", x.shape)
        
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
       # print("After conv1+pool:", x.shape)
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
       # print("After conv2+pool:", x.shape)
        
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
       # print("After conv3+pool:", x.shape)
        
        x = x.view(x.size(0), -1)
        #print("After flattening:", x.shape)
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
model = KeystrokeCNN()




In [24]:
class KeystrokeCNNv2(nn.Module):
    def __init__(self, input_height=80, input_width=300, num_classes=26, input_channels=4):
        super(KeystrokeCNNv2, self).__init__()

        self.input_channels = input_channels
        
        # 3D Convolutional layers
        self.conv1 = nn.Conv3d(input_channels, 32, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv3 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))

        # 3D Pooling layer - only pool spatial dimensions, not channels
        self.pool = nn.MaxPool3d(kernel_size=(1, 2, 2))

        # Calculate the final feature map size dynamically
        self._to_linear = self._get_conv_output_size(input_height, input_width)

        # Fully connected layers
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def _get_conv_output_size(self, height, width):
        x = torch.zeros(1, 1, self.input_channels, height, width)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        return x.numel()

    def forward(self, x):
        # Reshape input to 5D tensor
        x = x.unsqueeze(1)  # Add channel dimension for 3D conv
        
        # 3D convolutions with ReLU and pooling
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

#model = KeystrokeCNNv2()

In [5]:
# Loss function and optimizer
lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr)

#keyboard_names = ('dell', 'macbook', 'lenovo') # enter any new keyboard here for identification


In [7]:
# Function to train the model
from datetime import datetime

def train(model, train_loader, criterion, optimizer,lr, epochs,keyboard_name,BUFFER):
    
    model.train()
    losses = []
    
    for epoch in range(epochs):
        running_loss = 0.0
        epoch_losses = []
        
        for i, (inputs, labels) in enumerate(train_loader, 0):
            optimizer.zero_grad()
            
            try:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
                epoch_losses.append(loss.item())
                
                if i % 10 == 9:  # Print every 10 batches
                    print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}")
                    running_loss = 0.0
                    
            except Exception as e:
                print("Input tensors may not be of the right shape", e)
                raise e
        
        # Store average loss for this epoch
        avg_epoch_loss = sum(epoch_losses) / len(epoch_losses)
        losses.append(avg_epoch_loss)
        print(f"Epoch {epoch + 1} average loss: {avg_epoch_loss:.3f}")
        """ Save model after each epoch
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_epoch_loss,
        }, f"{save_path}_epoch_{epoch+1}.pt")
        """
    filename  = datetime.now().strftime("%Y%m%d_%H%M%S") + '_' + keyboard_name + '_keystroke_model.pt'
    # Save final model
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': losses[-1],
        'epochs': epochs, 
    }, filename)

    plt.plot(range(epochs), losses)
    plt.title(f'Training Loss for {epochs} epochs, {model} model, {criterion} loss function, and {lr} learning rate')
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.savefig(f'training_loss_{BUFFER}.png', dpi=300, bbox_inches='tight')
    #plt.show()
        
    return losses, filename

# Training call
#losses, filename = train(model, train_loader, criterion, optimizer, 30,"macbook")

#conv_layers = [layer for layer in model.modules() if isinstance(layer, nn.Conv2d)]

""""
for layer in conv_layers:
    # Get weights for this specific layer
    weights = layer.weight.data  # Shape: (out_channels, in_channels, kernel_size, kernel_size)
    
    # Calculate grid size based on number of output channels
    n_filters = layer.out_channels
    n_cols = 8  # Fixed number of columns
    n_rows = (n_filters + n_cols - 1) // n_cols  # Calculate rows needed
    
    # Create figure with appropriate size
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 2*n_rows))
    axes = axes.ravel()
    
    # Plot each filter
    for i in range(n_filters):
        # Get the first channel of the filter (since we want to visualize 2D)
        filter_weights = weights[i, 0].cpu().numpy()
        
        # Plot the filter
        axes[i].imshow(filter_weights, cmap='gray')
        axes[i].axis('off')
        axes[i].set_title(f'Filter {i+1}')
    
    # Hide empty subplots if any
    for i in range(n_filters, len(axes)):
        axes[i].axis('off')
    
    plt.suptitle(f'Filters of {layer.__class__.__name__} layer (in_channels={layer.in_channels}, out_channels={layer.out_channels})')
    plt.tight_layout()
    #plt.show()
"""

# Plot training loss



'"\nfor layer in conv_layers:\n    # Get weights for this specific layer\n    weights = layer.weight.data  # Shape: (out_channels, in_channels, kernel_size, kernel_size)\n\n    # Calculate grid size based on number of output channels\n    n_filters = layer.out_channels\n    n_cols = 8  # Fixed number of columns\n    n_rows = (n_filters + n_cols - 1) // n_cols  # Calculate rows needed\n\n    # Create figure with appropriate size\n    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 2*n_rows))\n    axes = axes.ravel()\n\n    # Plot each filter\n    for i in range(n_filters):\n        # Get the first channel of the filter (since we want to visualize 2D)\n        filter_weights = weights[i, 0].cpu().numpy()\n\n        # Plot the filter\n        axes[i].imshow(filter_weights, cmap=\'gray\')\n        axes[i].axis(\'off\')\n        axes[i].set_title(f\'Filter {i+1}\')\n\n    # Hide empty subplots if any\n    for i in range(n_filters, len(axes)):\n        axes[i].axis(\'off\')\n\n    plt.

In [58]:
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix
import seaborn as sns
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

epochs = 100

def load_model(filename):
    if "model" not in locals() or type(model) != KeystrokeCNN:
        print("Please load the model first")
        model = KeystrokeCNN()
        if not filename or filename is None:
            filename = input("Enter the filename of the saved model")
        checkpoint = torch.load(filename)
        model.load_state_dict(checkpoint['model_state_dict'])
        if 'epochs' in checkpoint:
            epochs = checkpoint['epochs']
        else: 
            epochs = 100
    
    return model, epochs
# Define character mapping (should match what you used in training)
letters = "abcdefghijklmnopqrstuvwxyz"
idx2char = {i: c for i, c in enumerate(letters)}

for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")

def predict(model, spectrogram_tensor):
    model.eval()  # Set model to evaluation mode
    
    with torch.no_grad():  # No need to compute gradients
        output = model(spectrogram_tensor)  # Forward pass
        probabilities = F.softmax(output, dim=1)  # Convert logits to probabilities
        predicted_idx = torch.argmax(probabilities, dim=1).item()  # Get class index
        predicted_letter = idx2char[predicted_idx]  # Convert index to letter
        
    return predicted_letter


def crop_spectrogram(tensor, target_width=300):
    return tensor[:, :, :target_width] 


#NUMPY_DIR = "dell_test"
spectrogram_tensors, keys, max_width = load_spectrograms_from_directory("dell_test/numpy_arrays")
# torch.Size([1, 1, 129, *])


def evaluate_model(model, spectrogram_tensors, keys,buffer):
    y_hat = []
    y_true = []
    for i in range(len(spectrogram_tensors)):
        test_tensor = spectrogram_tensors[i].unsqueeze(0)
        predicted_letter = predict(model, test_tensor)
        y_hat.append(predicted_letter)
        y_true.append(keys[i])
        #print(f"Predicted letter: {predicted_letter}")
        #print("actual letter:", keys[i])
        letter_to_idx = {letter: idx for idx, letter in enumerate(letters)}
        y_true_idx = [letter.item() for letter in y_true]
        y_hat_idx = [letter_to_idx[letter] for letter in y_hat]

        # Calculate metrics
    accuracy = accuracy_score(y_true_idx, y_hat_idx)
    precision = precision_score(y_true_idx, y_hat_idx, average='weighted')
    recall = recall_score(y_true_idx, y_hat_idx, average='weighted')
    f1 = f1_score(y_true_idx, y_hat_idx, average='weighted')

    # keys is a list of idx keys
    
    cm= confusion_matrix(y_hat, [letters[key] for key in keys])
    plt.figure(figsize=(12, 8))
    classes = list(letters) 
    sns.heatmap(cm, 
                annot=True,  # Show numbers in each cell
                fmt='d',     # Format as integers
                cmap='Blues', # Color scheme
                xticklabels=classes,  # Labels on x-axis: ['a', 'b', 'c', ..., 'z']
                yticklabels=classes) 
    plt.title(f'Confusion Matrix for {10} epochs')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()

    plt.savefig(f'confusion_matrix_{buffer}.png', dpi=300, bbox_inches='tight')
    #plt.show()

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

#print(evaluate_model(model, spectrogram_tensors, keys))




conv1.weight: torch.Size([32, 1, 3, 3])
conv1.bias: torch.Size([32])
conv2.weight: torch.Size([64, 32, 3, 3])
conv2.bias: torch.Size([64])
conv3.weight: torch.Size([128, 64, 3, 3])
conv3.bias: torch.Size([128])
fc1.weight: torch.Size([512, 75776])
fc1.bias: torch.Size([512])
fc2.weight: torch.Size([26, 512])
fc2.bias: torch.Size([26])
Error: Could not extract key from filename: keystroke_67_Key.esc.npy. Make sure it is a valid key (aA-zZ)
loaded 66 spectrograms


Left for implementation : adding a live prediction 


In [19]:

import numpy as np
import sounddevice as sd
import scipy.signal as signal
import torch
from scipy.ndimage import zoom
import time
from queue import Queue
import threading
import librosa
from scipy.signal import find_peaks
import time

letters = "abcdefghijklmnopqrstuvwxyz"
idx2char = {i: c for i, c in enumerate(letters)}

class LiveKeystrokeDetector:
    def __init__(self, model, sample_rate=44100, window_size=0.2, threshold=0.9999999):
        self.model = model
        self.sample_rate = sample_rate
        self.window_size = window_size
        self.threshold = threshold
        self.window_samples = int(window_size * sample_rate)
        self.audio_buffer = np.zeros(self.window_samples)
        self.prediction_queue = Queue()
        self.is_recording = False
        self.debounce_time = time.time()
        
    def audio_callback(self, indata, frames, time_info, status):
        """Callback function for audio streaming"""
        if status:
            print(f"Audio callback status: {status}")
            
        # Update buffer with new audio data
        self.audio_buffer = np.roll(self.audio_buffer, -frames)
        self.audio_buffer[-frames:] = indata.flatten()
        

        mel_spect = librosa.feature.melspectrogram(
            y=self.audio_buffer,
            sr=self.sample_rate,
            n_mels=80,           # Reduced from 128 - still detailed enough
            n_fft=2048,          # Keep this - good balance
            hop_length=512,      # Changed from 1024 for better temporal resolution
            window='hann',       # Keep this - good choice
            power=2.0            # Keep this - standard choice
        )

        # Convert to log scale (dB)
        mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)

        # Normalize to 0-1 range
        mel_spect_norm = (mel_spect_db - mel_spect_db.min()) / (mel_spect_db.max() - mel_spect_db.min())

        # Convert to tensor
        spectrogram_tensor = torch.FloatTensor(mel_spect_norm)
        
        # Pad or truncate to 300 in the last dimension
        current_length = spectrogram_tensor.shape[-1]
        if current_length < 300:
            # Pad to 300
            padding = (0, 300 - current_length)
            spectrogram_tensor = F.pad(spectrogram_tensor, padding, mode='constant', value=0)
        elif current_length > 300:
            # Center crop to 300
            start = (current_length - 300) // 2
            spectrogram_tensor = spectrogram_tensor[..., start:start+300]
        
        # Get the energy over time
        energy = np.mean(mel_spect, axis=0)
        
        # Find peaks with minimum height and distance
        peaks, _ = find_peaks(energy, 
                            height=0.5,          # Minimum height
                            distance=20,         # Minimum samples between peaks
                            prominence=0.3)      # Minimum prominence of peaks
        
        if len(peaks) > 0 and time.time() - self.debounce_time > 1:
            print("Keystroke detected")
            # Make prediction
            with torch.no_grad():
                spectrogram_tensor = spectrogram_tensor.unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions (1, 1, 80, 300)
                output = model(spectrogram_tensor)  # Forward pass
                probabilities = F.softmax(output, dim=1)  # Convert logits to probabilities
                predictions = sorted([(idx2char[letter],prob.item()) for letter, prob in enumerate(probabilities[0])], key=lambda x: x[1], reverse=True)
                print(predictions[:5])
                predicted_idx = torch.argmax(probabilities, dim=1).item()  # Get class index
                predicted_letter = idx2char[predicted_idx]  # Convert index to letter
                self.prediction_queue.put(predicted_letter)

            print("------------------------------------------")
            # add a delay to prevent multiple detections for the same keystroke
            self.debounce_time = time.time()

    
    def start_recording(self):
        """Start recording and processing audio"""
        self.is_recording = True
        print("Starting live keystroke detection...")
        
        try:
            with sd.InputStream(samplerate=self.sample_rate, 
                              channels=1, 
                              callback=self.audio_callback):
                while self.is_recording:
                    # Process predictions from queue
                    try:
                        prediction = self.prediction_queue.get_nowait()
                        print(f"Predicted key: {prediction}")
                    except:
                        pass
                    time.sleep(0.01)  # Prevent high CPU usage
                    
        except KeyboardInterrupt:
            print("\nStopping recording...")
            self.is_recording = False
    
    def stop_recording(self):
        """Stop recording"""
        self.is_recording = False

    

def main():
    # Load your trained model
    try: 
        model = KeystrokeCNN()
        checkpoint = torch.load(filename)  # Replace with your model path
        model.load_state_dict(checkpoint['model_state_dict'])
        print("Model loaded successfully")
    except:
        try: 
            print("Error: Could not load model")
            print("trying to load model from file")
            filename = "20250324_222848_dell_keystroke_model.pt" #input("enter filename of saved model")
            model = KeystrokeCNN()
            checkpoint = torch.load(filename)  # Replace with your model path
            model.load_state_dict(checkpoint['model_state_dict'])
            print("Model loaded successfully")
        except FileNotFoundError:
            print("Error: Could not load model")
            print("Please check the model file name and try again")
            return
    model.eval()  # Set to evaluation mode
    
    # Create detector instance
    detector = LiveKeystrokeDetector(model)
    
    try:
        # Start recording
        detector.start_recording()
    except KeyboardInterrupt:
        print("\nStopping...")
        detector.stop_recording()

if __name__ == "__main__":
    #main()
    pass

OSError: PortAudio library not found

Extra : optimizer for buffer time with analaysis of accuracy / precision 

In [None]:
#run(["jupytext", "nbconvert", "--to", "py", "audio_sampling.ipynb"])

In [8]:
from audio_sampling import generate_spectrograms

import csv 

def tune_hyperparameters():
    """
    Tunes the main hyperparameters, notably 

    - frequency acquisition algorithm : fast Fourrier Transform or Mel spectrogram
    - BUFFER timing
    """
    BUFFER_TIME_MAX = 0.1
    BUFFER_TIME_MIN = 0.0001
    BUFFER_INCREMENT = 0.0001

    NUMPY_DIR = "dell/numpy_arrays"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print("selected device for training  : ",device)


    test_spectrogram_tensors, test_keys, max_width = load_spectrograms_from_directory("dell_test/numpy_arrays") # now load the test set for evaluation

    print("test keys",test_keys)
    print("test tensors",len(test_spectrogram_tensors))

    accuracies = []
    for extraction_method in ("FFT","mel"): 
        for BUFFER in np.arange(BUFFER_TIME_MIN, BUFFER_TIME_MAX, BUFFER_INCREMENT):

            torch.cuda.empty_cache()

            average_keystroke_duration = generate_spectrograms(BUFFER,"dell",extraction_method)
            spectrogram_tensors, keys, max_width = load_spectrograms_from_directory(NUMPY_DIR)
            
            # Create label tensor for current dataset
            letters = "abcdefghijklmnopqrstuvwxyz"
            char_to_idx = {c: i for i, c in enumerate(letters)}
            label_tensor = torch.tensor([char_to_idx[k] for k in keys]).to(device)
            
            train_dataset = KeystrokeDataset(spectrogram_tensors, label_tensor)
            train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
            
            # Initialize model for each iteration
            model = KeystrokeCNN(num_classes=len(letters)).to(device)
            lr = 0.001
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr)
            
            _, filename = train(model, train_loader, criterion, optimizer,lr, 10,"dell",BUFFER)

            generate_spectrograms(BUFFER,"dell_test",extraction_method) # resample the test file with the same buffer time as the training dataset

            test_spectrogram_tensors, test_keys, max_width = load_spectrograms_from_directory("dell_test/numpy_arrays") # now load the test set for evaluation

            test_keys_tensors = torch.tensor([char_to_idx[k] for k in test_keys]).to(device)

            results = evaluate_model(model, test_spectrogram_tensors, test_keys_tensors,BUFFER) # evaluate the model using test keys

            accuracies.append((results["accuracy"], BUFFER))
            
            print(f"Buffer: {BUFFER:.6f}, Accuracy: {results['accuracy']:.4f}")

            with open("backup_results.txt", "a") as f:
                reader = csv.writer(f)
                reader.writerow([BUFFER,results["accuracy"],results["precision"]])

        # Separate accuracies and buffer times for plotting
        accuracy_values = [acc for acc, _ in accuracies]
        buffer_times = [buf for _, buf in accuracies]

        with open("results.txt", "w") as f:
            for acc, buf in accuracies:
                f.write(f"Buffer: {buf:.6f}, Accuracy: {acc:.4f}\n")

        # Create the plot
        plt.figure(figsize=(10, 6))
        plt.plot(buffer_times, accuracy_values, 'b-', marker='o')
        plt.xlabel('Buffer Time (seconds)')
        plt.ylabel('Accuracy')
        plt.title('Model Accuracy vs Buffer Time')
        plt.grid(True)
        plt.savefig('accuracy_vs_buffer_time.png', dpi=300, bbox_inches='tight')
        plt.show()

print(tune_hyperparameters())

selected device for training  :  cpu
Error: Could not extract key from filename: keystroke_67_Key.esc.npy. Make sure it is a valid key (aA-zZ)
loaded 66 spectrograms
test keys ['t', 'j', 'g', 'd', 'v', 'k', 'h', 'g', 'e', 'h', 'b', 'c', 'r', 'e', 't', 'd', 'h', 'k', 'f', 'i', 'u', 'g', 'c', 'j', 'v', 'g', 'y', 'r', 'g', 'f', 'l', 'd', 'd', 'd', 'g', 't', 'r', 't', 'd', 'h', 'y', 'v', 'b', 'q', 'b', 'a', 's', 'j', 'u', 'o', 'f', 'e', 'a', 'g', 'h', 'i', 'f', 'y', 'e', 'g', 'g', 'd', 'f', 'g', 'o', 'd']
test tensors 66
Summary of the keystroke data : 
Total valid keystrokes: 1096
Total invalid keystrokes: 11
Keystroke times:
Average keystroke duration:  0.10812529379562047
Processing complete. Spectrograms and NumPy arrays saved.
Error: Could not extract key from filename: keystroke_412_+.npy. Make sure it is a valid key (aA-zZ)
Error: Could not extract key from filename: keystroke_429_;.npy. Make sure it is a valid key (aA-zZ)
Error: Could not extract key from filename: keystroke_205_Ke

KeyboardInterrupt: 