In [1]:
!pip install torch torchvision torchaudio pandas numpy scikit-learn pynput matplotlib seaborn




In [2]:
# Cell 1: Imports and Setup
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from collections import deque, Counter
import warnings
warnings.filterwarnings('ignore')

print("✅ All packages imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

✅ All packages imported successfully!
PyTorch version: 2.7.1+cpu
Device: CPU


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from collections import deque, Counter
import time
import threading
import pickle
import os
from typing import List, Tuple, Dict, Optional
import warnings
warnings.filterwarnings('ignore')

class KeystrokeDataProcessor:
    """Process raw keystroke data into feature sequences"""
    
    def __init__(self, sequence_length: int = 10):
        self.sequence_length = sequence_length
        self.key_to_id = {}
        self.id_to_key = {}
        self.key_counter = 0
        
    def _get_key_id(self, key_code: int) -> int:
        """Convert key code to ID for embedding"""
        if key_code not in self.key_to_id:
            self.key_to_id[key_code] = self.key_counter
            self.id_to_key[self.key_counter] = key_code
            self.key_counter += 1
        return self.key_to_id[key_code]
    
    def _quantize_time(self, time_value: float, bins: int = 100) -> int:
        """Quantize timing values for embedding"""
        # Clip extreme values and quantize
        clipped = np.clip(time_value, 0, 500)  # Max 500ms
        return int(clipped * bins / 500)
    
    def process_raw_data(self, data: pd.DataFrame) -> List[Tuple[int, int, int]]:
        """
        Process raw keystroke data into sequences
        Input: DataFrame with columns [Timestamp, Event, Key_Code]
        Output: List of (key_id, hold_time_quantized, digraph_time_quantized)
        """
        sequences = []
        key_events = {}
        
        # Sort by timestamp
        data = data.sort_values('Timestamp')
        
        prev_key_down_time = None
        prev_key_code = None
        
        for _, row in data.iterrows():
            timestamp = row['Timestamp']
            event = row['Event']  # 1 = key down, 0 = key up
            key_code = row['Key_Code']
            
            if event == 1:  # Key down
                key_events[key_code] = timestamp
                
                # Calculate digraph flight time (time between consecutive key presses)
                digraph_time = 0
                if prev_key_down_time is not None:
                    digraph_time = timestamp - prev_key_down_time
                
                prev_key_down_time = timestamp
                prev_key_code = key_code
                
            elif event == 0 and key_code in key_events:  # Key up
                # Calculate hold time
                hold_time = timestamp - key_events[key_code]
                
                # Get digraph time (stored when key was pressed)
                digraph_time = 0
                if len(sequences) > 0:
                    # Use time difference between this key press and previous
                    digraph_time = key_events[key_code] - (sequences[-1][3] if len(sequences) > 0 else key_events[key_code])
                
                # Create feature tuple
                key_id = self._get_key_id(key_code)
                hold_time_q = self._quantize_time(hold_time / 1000000)  # Convert to ms
                digraph_time_q = self._quantize_time(digraph_time / 1000000)  # Convert to ms
                
                sequences.append((key_id, hold_time_q, digraph_time_q, key_events[key_code]))
                
                del key_events[key_code]
        
        # Remove the timestamp (last element) from tuples
        return [(seq[0], seq[1], seq[2]) for seq in sequences]
    
    def create_sequences(self, features: List[Tuple[int, int, int]]) -> List[List[Tuple[int, int, int]]]:
        """Create sliding window sequences"""
        if len(features) < self.sequence_length:
            return []
        
        sequences = []
        for i in range(len(features) - self.sequence_length + 1):
            sequences.append(features[i:i + self.sequence_length])
        
        return sequences

class KeystrokeDataset(Dataset):
    """PyTorch Dataset for keystroke sequences"""
    
    def __init__(self, sequences: List[List[Tuple[int, int, int]]], labels: List[int]):
        self.sequences = sequences
        self.labels = labels
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        label = self.labels[idx]
        
        # Convert to tensors
        keys = torch.tensor([s[0] for s in sequence], dtype=torch.long)
        hold_times = torch.tensor([s[1] for s in sequence], dtype=torch.long)
        digraph_times = torch.tensor([s[2] for s in sequence], dtype=torch.long)
        
        return keys, hold_times, digraph_times, torch.tensor(label, dtype=torch.long)

class TKCAModel(nn.Module):
    """TKCA Neural Network Model"""
    
    def __init__(self, num_keys: int, num_time_bins: int = 100, 
                 key_embed_dim: int = 16, time_embed_dim: int = 8,
                 hidden_dim: int = 64, num_layers: int = 2):
        super(TKCAModel, self).__init__()
        
        # Embedding layers
        self.key_embedding = nn.Embedding(num_keys, key_embed_dim)
        self.hold_time_embedding = nn.Embedding(num_time_bins, time_embed_dim)
        self.digraph_time_embedding = nn.Embedding(num_time_bins, time_embed_dim)
        
        # Input dimension for LSTM
        input_dim = key_embed_dim + 2 * time_embed_dim
        
        # Bi-LSTM
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, 
                           batch_first=True, bidirectional=True)
        
        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        
        # Classification layers
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 2)  # Binary classification
        )
    
    def forward(self, keys, hold_times, digraph_times):
        batch_size, seq_len = keys.size()
        
        # Embeddings
        key_embeds = self.key_embedding(keys)
        hold_embeds = self.hold_time_embedding(hold_times)
        digraph_embeds = self.digraph_time_embedding(digraph_times)
        
        # Concatenate embeddings
        inputs = torch.cat([key_embeds, hold_embeds, digraph_embeds], dim=-1)
        
        # Bi-LSTM
        lstm_out, _ = self.lstm(inputs)
        
        # Attention mechanism
        attention_weights = F.softmax(self.attention(lstm_out), dim=1)
        attended = torch.sum(attention_weights * lstm_out, dim=1)
        
        # Classification
        output = self.classifier(attended)
        return output

class TKCAAuthenticator:
    """Main TKCA Continuous Authentication System"""
    
    def __init__(self, user_id: str, sequence_length: int = 10, 
                 majority_vote_window: int = 5):
        self.user_id = user_id
        self.sequence_length = sequence_length
        self.majority_vote_window = majority_vote_window
        
        self.processor = KeystrokeDataProcessor(sequence_length)
        self.model = None
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Real-time monitoring
        self.keystroke_buffer = deque(maxlen=sequence_length * 2)
        self.prediction_buffer = deque(maxlen=majority_vote_window)
        self.is_monitoring = False
        self.monitoring_thread = None
        
        # Authentication callback
        self.auth_callback = None
    
    def train(self, user_data: pd.DataFrame, impostor_data: List[pd.DataFrame] = None):
        """Train the TKCA model"""
        print(f"Training TKCA model for user: {self.user_id}")
        
        # Process user data (positive samples)
        user_features = self.processor.process_raw_data(user_data)
        user_sequences = self.processor.create_sequences(user_features)
        user_labels = [0] * len(user_sequences)  # 0 = legitimate user
        
        all_sequences = user_sequences
        all_labels = user_labels
        
        # Process impostor data (negative samples)
        if impostor_data:
            for imp_data in impostor_data:
                imp_features = self.processor.process_raw_data(imp_data)
                imp_sequences = self.processor.create_sequences(imp_features)
                imp_labels = [1] * len(imp_sequences)  # 1 = impostor
                
                all_sequences.extend(imp_sequences)
                all_labels.extend(imp_labels)
        
        if len(all_sequences) == 0:
            raise ValueError("No valid sequences generated from data")
        
        print(f"Generated {len(all_sequences)} sequences ({len(user_sequences)} user, {len(all_sequences) - len(user_sequences)} impostor)")
        
        # Create dataset
        dataset = KeystrokeDataset(all_sequences, all_labels)
        dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
        
        # Initialize model
        num_keys = self.processor.key_counter
        self.model = TKCAModel(num_keys=num_keys).to(self.device)
        
        # Training setup
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        
        # Training loop
        self.model.train()
        num_epochs = 50
        
        for epoch in range(num_epochs):
            total_loss = 0
            correct = 0
            total = 0
            
            for keys, hold_times, digraph_times, labels in dataloader:
                keys = keys.to(self.device)
                hold_times = hold_times.to(self.device)
                digraph_times = digraph_times.to(self.device)
                labels = labels.to(self.device)
                
                optimizer.zero_grad()
                outputs = self.model(keys, hold_times, digraph_times)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            
            if (epoch + 1) % 10 == 0:
                accuracy = 100 * correct / total
                print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
        
        print("Training completed!")
    
    def predict_sequence(self, sequence: List[Tuple[int, int, int]]) -> int:
        """Predict if a sequence is from legitimate user or impostor"""
        if self.model is None:
            raise ValueError("Model not trained yet")
        
        if len(sequence) != self.sequence_length:
            raise ValueError(f"Sequence must be of length {self.sequence_length}")
        
        self.model.eval()
        with torch.no_grad():
            # Convert sequence to tensors
            keys = torch.tensor([s[0] for s in sequence], dtype=torch.long).unsqueeze(0).to(self.device)
            hold_times = torch.tensor([s[1] for s in sequence], dtype=torch.long).unsqueeze(0).to(self.device)
            digraph_times = torch.tensor([s[2] for s in sequence], dtype=torch.long).unsqueeze(0).to(self.device)
            
            outputs = self.model(keys, hold_times, digraph_times)
            _, predicted = torch.max(outputs, 1)
            
            return predicted.item()
    
    def add_keystroke(self, timestamp: float, event: int, key_code: int):
        """Add a keystroke to the real-time buffer"""
        self.keystroke_buffer.append((timestamp, event, key_code))
        
        # Try to create a sequence from recent keystrokes
        if len(self.keystroke_buffer) >= self.sequence_length * 2:
            # Convert buffer to DataFrame for processing
            df = pd.DataFrame(list(self.keystroke_buffer), 
                            columns=['Timestamp', 'Event', 'Key_Code'])
            
            try:
                features = self.processor.process_raw_data(df)
                if len(features) >= self.sequence_length:
                    # Get the most recent sequence
                    recent_sequence = features[-self.sequence_length:]
                    
                    # Predict
                    prediction = self.predict_sequence(recent_sequence)
                    self.prediction_buffer.append(prediction)
                    
                    # Apply majority vote if buffer is full
                    if len(self.prediction_buffer) == self.majority_vote_window:
                        majority_vote = Counter(self.prediction_buffer).most_common(1)[0][0]
                        
                        # Callback with result
                        if self.auth_callback:
                            self.auth_callback(majority_vote == 0)  # True if legitimate user
                        
                        return majority_vote == 0
            except Exception as e:
                print(f"Error processing keystroke: {e}")
        
        return None
    
    def set_auth_callback(self, callback):
        """Set callback function for authentication results"""
        self.auth_callback = callback
    
    def start_monitoring(self):
        """Start continuous monitoring (placeholder - would integrate with actual keystroke capture)"""
        self.is_monitoring = True
        print(f"Started continuous authentication monitoring for user: {self.user_id}")
        print("Note: This is a demonstration. In practice, you'd integrate with system-level keystroke capture.")
    
    def stop_monitoring(self):
        """Stop continuous monitoring"""
        self.is_monitoring = False
        print("Stopped monitoring")
    
    def save_model(self, filepath: str):
        """Save the trained model and processor with version info"""
        if self.model is None:
            raise ValueError("No model to save")
        
        save_data = {
            'version': '1.0',  # Add version tracking
            'model_state_dict': self.model.state_dict(),
            'processor': self.processor,
            'user_id': self.user_id,
            'sequence_length': self.sequence_length,
            'majority_vote_window': self.majority_vote_window,
            'timestamp': time.time()  # Add save timestamp
        }
        
        torch.save(save_data, filepath, _use_new_zipfile_serialization=False)
        print(f"Model saved to {filepath}")


    
    def load_model(self, filepath: str):
        """Load a trained model and processor with error handling"""
        if not os.path.exists(filepath):
            raise FileNotFoundError(f"Model file not found: {filepath}")
        
        try:
            save_data = torch.load(filepath, map_location=self.device, weights_only=False)
            
            # Check if all required keys exist
            required_keys = ['processor', 'user_id', 'sequence_length', 'majority_vote_window', 'model_state_dict']
            missing_keys = [key for key in required_keys if key not in save_data]
            
            if missing_keys:
                raise KeyError(f"Missing keys in saved model: {missing_keys}")
            
            self.processor = save_data['processor']
            self.user_id = save_data['user_id']
            self.sequence_length = save_data['sequence_length']
            self.majority_vote_window = save_data['majority_vote_window']
            
            # Initialize model with correct parameters
            num_keys = self.processor.key_counter
            self.model = TKCAModel(num_keys=num_keys).to(self.device)
            self.model.load_state_dict(save_data['model_state_dict'])
            
            print(f"Model loaded from {filepath}")
            
        except KeyError as e:
            print(f"Model file format incompatible: {e}")
            print("The saved model was created with a different version of the code.")
            print("Please retrain the model or use a compatible model file.")
            raise
        except Exception as e:
            print(f"Error loading model: {e}")
            raise





# Demo and testing functions
def create_sample_data():
    # Load actual user data from CSV
    user_a_df = pd.read_csv("User_A_2025.csv")

    # Filter or clean if needed (optional)
    # For example: keep only necessary columns
    user_a_df = user_a_df[['Timestamp', 'Event', 'Key_Code']]

    # Simulate impostor data (if needed for testing)
    impostor_data = {
        'Timestamp': [1720000000000000 + i * 150000 for i in range(20)],
        'Event': [1, 0] * 10,
        'Key_Code': [72, 72, 69, 69, 76, 76, 76, 76, 79, 79, 32, 32, 87, 87, 79, 79, 82, 82, 76, 76]
    }
    impostor_df = pd.DataFrame(impostor_data)

    return user_a_df, impostor_df


def demo_tkca_system():
    """Demonstrate the TKCA system"""
    print("=== TKCA Continuous Authentication System Demo ===\n")
    
    # Create sample data
    user_data, impostor_data = create_sample_data()
    
    # Initialize authenticator
    auth = TKCAAuthenticator(user_id="user_A", sequence_length=8, majority_vote_window=3)
    
    # Define authentication callback
    def auth_result_callback(is_legitimate: bool):
        if is_legitimate:
            print("✅ Authentication: LEGITIMATE USER")
        else:
            print("❌ Authentication: IMPOSTOR DETECTED - ALERT!")
    
    auth.set_auth_callback(auth_result_callback)
    
    try:
        # Train the model
        print("1. Training TKCA model...")
        auth.train(user_data, [impostor_data])
        
        # Save model
        print("\n2. Saving model...")
        auth.save_model("tkca_model_user_A.pth")
        
        # Test with sample sequences
        print("\n3. Testing authentication...")
        
        # Simulate real-time keystroke input
        print("\nSimulating real-time keystroke authentication:")
        print("Adding keystrokes to buffer...")
        
        # Simulate some keystrokes (from user data)
        test_keystrokes = [
            (1720000000000000, 1, 84),  # T down
            (1720000000100000, 0, 84),  # T up
            (1720000000200000, 1, 72),  # H down
            (1720000000300000, 0, 72),  # H up
            (1720000000400000, 1, 69),  # E down
            (1720000000500000, 0, 69),  # E up
            (1720000000600000, 1, 32),  # Space down
            (1720000000700000, 0, 32),  # Space up
            (1720000000800000, 1, 84),  # T down
            (1720000000900000, 0, 84),  # T up
            (1720000001000000, 1, 72),  # H down
            (1720000001100000, 0, 72),  # H up
            (1720000001200000, 1, 69),  # E down
            (1720000001300000, 0, 69),  # E up
            (1720000001400000, 1, 32),  # Space down
            (1720000001500000, 0, 32),  # Space up
        ]
        
        for timestamp, event, key_code in test_keystrokes:
            result = auth.add_keystroke(timestamp, event, key_code)
            if result is not None:
                break
        
        print("\n4. System ready for continuous monitoring!")
        auth.start_monitoring()
        
        print("\n=== Demo completed successfully! ===")
        print("\nTo use in production:")
        print("1. Integrate with system-level keystroke capture")
        print("2. Collect more training data for better accuracy")
        print("3. Implement secure model storage and loading")
        print("4. Add user feedback mechanisms for model improvement")
        
    except Exception as e:
        print(f"Error in demo: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    demo_tkca_system()

=== TKCA Continuous Authentication System Demo ===

1. Training TKCA model...
Training TKCA model for user: user_A
Generated 350 sequences (350 user, 0 impostor)
Epoch [10/50], Loss: 0.0594, Accuracy: 99.14%
Epoch [20/50], Loss: 0.0636, Accuracy: 99.14%
Epoch [30/50], Loss: 0.0049, Accuracy: 100.00%
Epoch [40/50], Loss: 0.0006, Accuracy: 100.00%
Epoch [50/50], Loss: 0.0008, Accuracy: 100.00%
Training completed!

2. Saving model...
Model saved to tkca_model_user_A.pth

3. Testing authentication...

Simulating real-time keystroke authentication:
Adding keystrokes to buffer...

4. System ready for continuous monitoring!
Started continuous authentication monitoring for user: user_A
Note: This is a demonstration. In practice, you'd integrate with system-level keystroke capture.

=== Demo completed successfully! ===

To use in production:
1. Integrate with system-level keystroke capture
2. Collect more training data for better accuracy
3. Implement secure model storage and loading
4. Add use

In [4]:
!pip install pynput




In [5]:
from pynput import keyboard
import time

class RealTimeKeystrokeCapture:
    """Capture real-time keystrokes and feed to TKCA system"""
    
    def __init__(self, tkca_authenticator):
        self.auth = tkca_authenticator
        self.listener = None
        self.start_time = time.time()
        
    def on_press(self, key):
        """Handle key press events"""
        try:
            # Get current timestamp in microseconds (matching the training data format)
            timestamp = int(time.time() * 1000000)
            
            # Get key code
            if hasattr(key, 'vk'):
                key_code = key.vk
            elif hasattr(key, 'char') and key.char:
                key_code = ord(key.char.upper())
            else:
                # Handle special keys
                key_code = hash(str(key)) % 1000  # Simple hash for special keys
            
            # Add keystroke (event=1 for key down)
            self.auth.add_keystroke(timestamp, 1, key_code)
            
        except Exception as e:
            print(f"Error capturing key press: {e}")
    
    def on_release(self, key):
        """Handle key release events"""
        try:
            timestamp = int(time.time() * 1000000)
            
            # Get key code (same logic as press)
            if hasattr(key, 'vk'):
                key_code = key.vk
            elif hasattr(key, 'char') and key.char:
                key_code = ord(key.char.upper())
            else:
                key_code = hash(str(key)) % 1000
            
            # Add keystroke (event=0 for key up)
            self.auth.add_keystroke(timestamp, 0, key_code)
            
            # Stop on ESC key
            if key == keyboard.Key.esc:
                print("\nStopping real-time capture...")
                return False
                
        except Exception as e:
            print(f"Error capturing key release: {e}")
    
    def start_capture(self):
        """Start capturing keystrokes"""
        print("Starting real-time keystroke capture...")
        print("Type normally - authentication will happen automatically")
        print("Press ESC to stop")
        
        with keyboard.Listener(
            on_press=self.on_press,
            on_release=self.on_release) as listener:
            listener.join()


In [6]:
# import os

# def demo_real_time_tkca():
#     """Demo TKCA with real-time typing"""
#     print("=== Real-Time TKCA Authentication Demo ===\n")
    
#     auth = TKCAAuthenticator(user_id="user_A", sequence_length=8, majority_vote_window=3)
    
#     def auth_result_callback(is_legitimate: bool):
#         timestamp = time.strftime("%H:%M:%S")
#         if is_legitimate:
#             print(f"[{timestamp}] ✅ LEGITIMATE USER - Access granted")
#         else:
#             print(f"[{timestamp}] ❌ IMPOSTOR DETECTED - Security alert!")
    
#     auth.set_auth_callback(auth_result_callback)
    
#     try:
#         model_path = "tkca_model_user_A.pth"
        
#         # Delete existing model file if it exists
#         if os.path.exists(model_path):
#             print("Removing incompatible model file...")
#             os.remove(model_path)
        
#         # Always train a new model
#         print("Training new model...")
#         user_data, impostor_data = create_sample_data()
#         auth.train(user_data, [impostor_data])
#         auth.save_model(model_path)
        
#         print("\n🎯 Model ready! Starting real-time authentication...")
        
#         # Start real-time capture here
#         # (Add your real-time capture code)
        
#     except Exception as e:
#         print(f"Error: {e}")
#         import traceback
#         traceback.print_exc()


In [7]:
def demo_real_time_tkca():
    """Demo TKCA with real-time typing"""
    print("=== Real-Time TKCA Authentication Demo ===\n")
    
    # Load existing model or train new one
    auth = TKCAAuthenticator(user_id="user_A", sequence_length=8, majority_vote_window=3)
    
    # Authentication callback with more detailed feedback
    def auth_result_callback(is_legitimate: bool):
        timestamp = time.strftime("%H:%M:%S")
        if is_legitimate:
            print(f"[{timestamp}] ✅ LEGITIMATE USER - Access granted")
        else:
            print(f"[{timestamp}] ❌ IMPOSTOR DETECTED - Security alert!")
    
    auth.set_auth_callback(auth_result_callback)
    
    try:
        # Try to load existing model
        if os.path.exists("tkca_model_user_A.pth"):
            print("Loading existing model...")
            auth.load_model("tkca_model_user_A.pth")
        else:
            print("Training new model...")
            user_data, impostor_data = create_sample_data()
            auth.train(user_data, [impostor_data])
            auth.save_model("tkca_model_user_A.pth")
        
        print("\n🎯 Model ready! Starting real-time authentication...")
        print("📝 Start typing to test the system")
        print("⚡ Authentication results will appear automatically")
        
        # Start real-time capture
        capture = RealTimeKeystrokeCapture(auth)
        capture.start_capture()
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

# Add this to your main section
if __name__ == "__main__":
    # Choose which demo to run
    choice = input("Choose demo:\n1. Simulated data\n2. Real-time typing\nEnter choice (1/2): ")
    
    if choice == "2":
        demo_real_time_tkca()
    else:
        demo_tkca_system()


Choose demo:
1. Simulated data
2. Real-time typing
Enter choice (1/2):  2


=== Real-Time TKCA Authentication Demo ===

Loading existing model...
Model loaded from tkca_model_user_A.pth

🎯 Model ready! Starting real-time authentication...
📝 Start typing to test the system
⚡ Authentication results will appear automatically
Starting real-time keystroke capture...
Type normally - authentication will happen automatically
Press ESC to stop
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: index out of range in self
Error processing keystroke: inde

In [8]:
def prepare_sample_data():
    """Convert your sample data to proper DataFrame format"""
    
    # Your sample data
    sample_data = [
        (1.71968E+16, 1, 84), (1.71968E+16, 0, 84),  # T
        (1.71968E+16, 1, 72), (1.71968E+16, 0, 72),  # H  
        (1.71968E+16, 1, 69), (1.71968E+16, 0, 69),  # E
        (1.71968E+16, 1, 32), (1.71968E+16, 0, 32),  # Space
        (1.71968E+16, 1, 84), (1.71968E+16, 0, 84),  # T
        (1.71968E+16, 1, 72), (1.71968E+16, 0, 72),  # H
        (1.71968E+16, 1, 69), (1.71968E+16, 0, 69),  # E
        (1.71968E+16, 1, 32), (1.71968E+16, 0, 32),  # Space
        (1.71968E+16, 1, 66), (1.71968E+16, 0, 66),  # B
        (1.71968E+16, 1, 69), (1.71968E+16, 0, 69),  # E
        (1.71968E+16, 1, 78), (1.71968E+16, 0, 78),  # N
        (1.71968E+16, 1, 65), (1.71968E+16, 0, 65),  # A
        (1.71968E+16, 1, 83), (1.71968E+16, 0, 83),  # S
        (1.71968E+16, 1, 79), (1.71968E+16, 0, 79),  # O
        (1.71968E+16, 1, 32), (1.71968E+16, 0, 32),  # Space
        (1.71968E+16, 1, 70), (1.71968E+16, 0, 70),  # F
        (1.71968E+16, 1, 85), # U (incomplete in your data)
    ]
    
    # Add realistic timestamps (the scientific notation shows all same timestamp)
    # We'll add incremental microsecond differences
    processed_data = []
    base_timestamp = int(1.71968E+16)
    
    for i, (timestamp, event, key_code) in enumerate(sample_data):
        # Add realistic timing between keystrokes (50-200ms intervals)
        realistic_timestamp = base_timestamp + (i * 100000)  # 100ms intervals
        processed_data.append((realistic_timestamp, event, key_code))
    
    # Convert to DataFrame
    df = pd.DataFrame(processed_data, columns=['Timestamp', 'Event', 'Key_Code'])
    return df


In [9]:
def demo_with_your_data():
    """Demo TKCA system with your specific sample data"""
    print("=== Testing TKCA with Your Sample Data ===\n")
    
    # Prepare your sample data
    your_data = prepare_sample_data()
    print(f"Loaded {len(your_data)} keystroke events")
    print("Sample data preview:")
    print(your_data.head(10))
    print()
    
    # Create some impostor data for training contrast
    impostor_data = {
        'Timestamp': [int(1.71968E+16) + i * 150000 for i in range(20)],
        'Event': [1, 0] * 10,
        'Key_Code': [72, 72, 69, 69, 76, 76, 76, 76, 79, 79, 32, 32, 87, 87, 79, 79, 82, 82, 76, 76]
    }
    impostor_df = pd.DataFrame(impostor_data)
    
    # Initialize authenticator
    auth = TKCAAuthenticator(user_id="your_user", sequence_length=6, majority_vote_window=3)
    
    # Authentication callback
    def auth_result_callback(is_legitimate: bool):
        timestamp = time.strftime("%H:%M:%S")
        if is_legitimate:
            print(f"[{timestamp}] ✅ LEGITIMATE USER - Your typing pattern recognized")
        else:
            print(f"[{timestamp}] ❌ IMPOSTOR DETECTED - Unusual typing pattern!")
    
    auth.set_auth_callback(auth_result_callback)
    
    try:
        print("1. Training model with your data...")
        auth.train(your_data, [impostor_df])
        
        print("\n2. Testing real-time authentication with your keystrokes...")
        
        # Test each keystroke from your data in real-time simulation
        print("Simulating real-time input of your sample data:")
        
        for _, row in your_data.iterrows():
            timestamp = int(row['Timestamp'])
            event = int(row['Event'])
            key_code = int(row['Key_Code'])
            
            # Add keystroke to system
            result = auth.add_keystroke(timestamp, event, key_code)
            
            # Show what key was pressed/released
            key_char = chr(key_code) if 32 <= key_code <= 126 else f"Key_{key_code}"
            action = "DOWN" if event == 1 else "UP"
            print(f"Key: {key_char} {action}")
            
            # Small delay to simulate real typing
            time.sleep(0.1)
        
        print("\n3. Testing completed!")
        print("\nYour sample data analysis:")
        analyze_your_data(your_data)
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

def analyze_your_data(data):
    """Analyze the characteristics of your sample data"""
    print("\n=== Data Analysis ===")
    
    # Extract the text being typed
    text_chars = []
    for _, row in data.iterrows():
        if row['Event'] == 1:  # Key down events
            key_code = int(row['Key_Code'])
            if 32 <= key_code <= 126:  # Printable ASCII
                text_chars.append(chr(key_code))
    
    typed_text = ''.join(text_chars)
    print(f"Text typed: '{typed_text}'")
    print(f"Total keystrokes: {len(data)}")
    print(f"Unique keys: {data['Key_Code'].nunique()}")
    
    # Calculate basic timing statistics
    key_down_events = data[data['Event'] == 1]
    if len(key_down_events) > 1:
        timestamps = key_down_events['Timestamp'].values
        intervals = np.diff(timestamps) / 1000000  # Convert to seconds
        print(f"Average time between keystrokes: {np.mean(intervals):.3f} seconds")
        print(f"Typing speed: ~{60 / np.mean(intervals):.1f} keys per minute")


In [10]:
if __name__ == "__main__":
    choice = input("Choose test:\n1. Original demo\n2. Test with your sample data\nEnter choice (1/2): ")
    
    if choice == "2":
        demo_with_your_data()
    else:
        demo_tkca_system()


Choose test:
1. Original demo
2. Test with your sample data
Enter choice (1/2):  2


=== Testing TKCA with Your Sample Data ===

Loaded 33 keystroke events
Sample data preview:
           Timestamp  Event  Key_Code
0  17196800000000000      1        84
1  17196800000100000      0        84
2  17196800000200000      1        72
3  17196800000300000      0        72
4  17196800000400000      1        69
5  17196800000500000      0        69
6  17196800000600000      1        32
7  17196800000700000      0        32
8  17196800000800000      1        84
9  17196800000900000      0        84

1. Training model with your data...
Training TKCA model for user: your_user
Generated 16 sequences (16 user, 0 impostor)
Epoch [10/50], Loss: 0.6087, Accuracy: 68.75%
Epoch [20/50], Loss: 0.4219, Accuracy: 68.75%
Epoch [30/50], Loss: 0.1847, Accuracy: 100.00%
Epoch [40/50], Loss: 0.0623, Accuracy: 100.00%
Epoch [50/50], Loss: 0.0147, Accuracy: 100.00%
Training completed!

2. Testing real-time authentication with your keystrokes...
Simulating real-time input of your sample data:
Key: T

In [None]:
# # Cell 3: Create Your Sample Data
# def create_sample_dataset():
#     """Create realistic keystroke data"""
    
#     # User A data (your typing pattern)
#     user_keystrokes = []
#     base_time = 1720000000000000
    
#     # Simulate typing "THE QUICK BROWN FOX"
#     text = "THE QUICK BROWN FOX"
#     key_codes = {
#         'T': 84, 'H': 72, 'E': 69, ' ': 32, 'Q': 81, 'U': 85, 
#         'I': 73, 'C': 67, 'K': 75, 'B': 66, 'R': 82, 'O': 79, 
#         'W': 87, 'N': 78, 'F': 70, 'X': 88
#     }
    
#     current_time = base_time
#     for char in text:
#         if char in key_codes:
#             # Key down
#             user_keystrokes.append([current_time, 1, key_codes[char]])
#             current_time += np.random.randint(80000, 120000)  # Hold time 80-120ms
            
#             # Key up
#             user_keystrokes.append([current_time, 0, key_codes[char]])
#             current_time += np.random.randint(50000, 100000)  # Time to next key
    
#     user_df = pd.DataFrame(user_keystrokes, columns=['Timestamp', 'Event', 'Key_Code'])
    
#     # Impostor data (different typing pattern)
#     impostor_keystrokes = []
#     current_time = base_time + 1000000
    
#     for char in "HELLO WORLD TEST":
#         if char in key_codes:
#             # Different timing pattern
#             impostor_keystrokes.append([current_time, 1, key_codes[char]])
#             current_time += np.random.randint(150000, 200000)  # Slower typing
            
#             impostor_keystrokes.append([current_time, 0, key_codes[char]])
#             current_time += np.random.randint(100000, 150000)
    
#     impostor_df = pd.DataFrame(impostor_keystrokes, columns=['Timestamp', 'Event', 'Key_Code'])
    
#     return user_df, impostor_df

# # Create sample data
# user_data, impostor_data = create_sample_dataset()
# print(f"✅ User data created: {len(user_data)} events")
# print(f"✅ Impostor data created: {len(impostor_data)} events")

# # Display sample
# print("\nSample User Data:")
# print(user_data.head(10))

In [None]:
# # Cell 4: Train TKCA Model
# print("🚀 Starting TKCA Training...")

# # Initialize authenticator
# auth = TKCAAuthenticator(
#     user_id="user_A", 
#     sequence_length=8,  # Length of keystroke sequences
#     majority_vote_window=3  # Number of predictions for majority vote
# )

# # Train the model
# try:
#     auth.train(user_data, [impostor_data])
#     print("✅ Training completed successfully!")
    
#     # Save the model
#     auth.save_model("tkca_model_user_A.pth")
#     print("✅ Model saved!")
    
# except Exception as e:
#     print(f"❌ Training failed: {e}")

In [None]:
# # Cell 5: Test Authentication
# print("🧪 Testing TKCA Authentication System")
# print("=" * 50)

# # Test function
# def test_authentication():
#     results = []
    
#     # Test with legitimate user data
#     print("1. Testing with LEGITIMATE USER data...")
#     test_user_data, _ = create_sample_dataset()
    
#     # Process test data
#     test_features = auth.processor.process_raw_data(test_user_data)
#     if len(test_features) >= auth.sequence_length:
#         test_sequence = test_features[:auth.sequence_length]
#         prediction = auth.predict_sequence(test_sequence)
#         result = "LEGITIMATE" if prediction == 0 else "IMPOSTOR"
#         print(f"   Result: {result} ({'✅' if prediction == 0 else '❌'})")
#         results.append(('Legitimate', prediction == 0))
    
#     # Test with impostor data
#     print("2. Testing with IMPOSTOR data...")
#     _, test_impostor_data = create_sample_dataset()
    
#     test_imp_features = auth.processor.process_raw_data(test_impostor_data)
#     if len(test_imp_features) >= auth.sequence_length:
#         test_imp_sequence = test_imp_features[:auth.sequence_length]
#         prediction = auth.predict_sequence(test_imp_sequence)
#         result = "LEGITIMATE" if prediction == 0 else "IMPOSTOR"
#         print(f"   Result: {result} ({'❌' if prediction == 0 else '✅'})")
#         results.append(('Impostor', prediction == 1))
    
#     return results

# # Run tests
# test_results = test_authentication()
# print(f"\n📊 Test Summary:")
# for test_type, correct in test_results:
#     print(f"   {test_type}: {'PASSED' if correct else 'FAILED'}")

In [None]:
# # Cell 6: Simulate Real-time Authentication
# print("⚡ Real-time Authentication Simulation")
# print("=" * 50)

# # Set up callback for results
# def auth_callback(is_legitimate):
#     if is_legitimate:
#         print("🟢 AUTHENTICATION: LEGITIMATE USER - Access Granted")
#     else:
#         print("🔴 SECURITY ALERT: IMPOSTOR DETECTED - Access Denied!")

# auth.set_auth_callback(auth_callback)

# # Simulate real-time keystrokes
# print("Simulating real-time typing...")

# # Create test keystroke stream
# test_keystrokes = [
#     (1720000000000000, 1, 84),  # T down
#     (1720000000100000, 0, 84),  # T up
#     (1720000000200000, 1, 72),  # H down
#     (1720000000300000, 0, 72),  # H up
#     (1720000000400000, 1, 69),  # E down
#     (1720000000500000, 0, 69),  # E up
#     (1720000000600000, 1, 32),  # Space down
#     (1720000000700000, 0, 32),  # Space up
#     (1720000000800000, 1, 81),  # Q down
#     (1720000000900000, 0, 81),  # Q up
#     (1720000001000000, 1, 85),  # U down
#     (1720000001100000, 0, 85),  # U up
#     (1720000001200000, 1, 73),  # I down
#     (1720000001300000, 0, 73),  # I up
#     (1720000001400000, 1, 67),  # C down
#     (1720000001500000, 0, 67),  # C up
#     (1720000001600000, 1, 75),  # K down
#     (1720000001700000, 0, 75),  # K up
# ]

# print("Adding keystrokes to authentication buffer...")
# for i, (timestamp, event, key_code) in enumerate(test_keystrokes):
#     print(f"Keystroke {i+1}: Key {key_code}, Event {event}")
#     result = auth.add_keystroke(timestamp, event, key_code)
    
#     if result is not None:
#         print(f"Authentication decision made after {i+1} keystrokes")
#         break

# print("✅ Real-time simulation completed!")