In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from joblib import dump
import random
from tqdm import tqdm

# User IDs
user_ids = ['user1', 'user2', 'user3', 'user4', 'user5']

def generate_movement_sequence(user_id, sequence_length=20):
    sequence = []
    base_x, base_y = random.randint(100, 500), random.randint(100, 500)
    
    for _ in range(sequence_length):
       
        if user_id == 'user1':  
            x = base_x + random.gauss(0, 5)
            y = base_y + random.gauss(0, 5)
        elif user_id == 'user2':  
            x = base_x + random.gauss(0, 20)
            y = base_y + random.gauss(0, 25)
        elif user_id == 'user3':  
            angle = random.uniform(0, 2*np.pi)
            x = base_x + 15 * np.cos(angle) + random.gauss(0, 3)
            y = base_y + 15 * np.sin(angle) + random.gauss(0, 3)
        elif user_id == 'user4': 
            x = base_x + random.gauss(0, 10)
            y = base_y + x * 0.7 + random.gauss(0, 4)
        else:  
            x = base_x + random.gauss(0, 30)
            y = base_y + random.gauss(0, 35)
        
        sequence.append((x, y))
        base_x, base_y = x, y
    
    return sequence

def extract_features(sequence):
    features = {}
    x_coords = [p[0] for p in sequence]
    y_coords = [p[1] for p in sequence]
    
    
    dx = np.diff(x_coords)
    dy = np.diff(y_coords)
    
    # Basic statistics
    features['mean_dx'] = np.mean(dx)
    features['mean_dy'] = np.mean(dy)
    features['std_dx'] = np.std(dx)
    features['std_dy'] = np.std(dy)
    features['total_distance'] = np.sum(np.sqrt(dx**2 + dy**2))
    

    distances = np.sqrt(dx**2 + dy**2)
    speeds = distances  
    accelerations = np.diff(speeds)
    
    features['mean_speed'] = np.mean(speeds)
    features['max_speed'] = np.max(speeds)
    features['speed_variation'] = np.std(speeds)
    features['mean_acceleration'] = np.mean(accelerations)
    
    
    angles = np.arctan2(dy, dx)
    features['angle_variation'] = np.std(angles)
    features['mean_angle_change'] = np.mean(np.abs(np.diff(angles)))
    
    
    curvature = np.diff(angles) / (distances[1:] + 1e-6)  # Avoid division by zero
    features['mean_curvature'] = np.mean(curvature)
    
    return features


num_samples_per_user = 500
sequence_length = 20  

data = []
labels = []

print("Generating training data...")
for user_id in user_ids:
    for _ in tqdm(range(num_samples_per_user), desc=f"Generating {user_id}"):
        sequence = generate_movement_sequence(user_id, sequence_length)
        features = extract_features(sequence)
        data.append(features)
        labels.append(user_id)


features_df = pd.DataFrame(data)
features_df['user_id'] = labels

Generating training data...


Generating user1: 100%|██████████| 500/500 [00:00<00:00, 9093.70it/s]
Generating user2: 100%|██████████| 500/500 [00:00<00:00, 9090.50it/s]
Generating user3: 100%|██████████| 500/500 [00:00<00:00, 7042.95it/s]
Generating user4: 100%|██████████| 500/500 [00:00<00:00, 8926.56it/s]
Generating user5: 100%|██████████| 500/500 [00:00<00:00, 9078.34it/s]


In [None]:

X = features_df.drop('user_id', axis=1)
y = features_df['user_id']


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


model = RandomForestClassifier(
    n_estimators=150,
    max_depth=10,
    random_state=42,
    class_weight='balanced'
)
model.fit(X_train, y_train)


train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)
print(f"Training accuracy: {train_acc:.4f}")
print(f"Test accuracy: {test_acc:.4f}")


dump(model, 'mouse_sequence_model.joblib')
dump(list(X.columns), 'feature_names.joblib')

Training accuracy: 0.9995
Test accuracy: 0.9760


['feature_names.joblib']

In [None]:
def verify_user(coordinate_sequence, claimed_user_id):
    """
    Verify if the mouse movement sequence matches the claimed user ID.
    
    Args:
        coordinate_sequence: List of (x,y) tuples representing mouse movements
        claimed_user_id: The user ID to verify against
    
    Returns:
        1 if the movement matches the claimed user, 0 otherwise
    """

    model = joblib.load('mouse_sequence_model.joblib')
    feature_names = joblib.load('feature_names.joblib')
    
 
    features = extract_features(coordinate_sequence)
    

    input_data = pd.DataFrame([features])[feature_names]
    

    predicted_user = model.predict(input_data)[0]
    
    return 1 if predicted_user == claimed_user_id else 0

In [None]:

sample_sequence = [(100, 200), (105, 203), (110, 210), (115, 215), 
                   (120, 220), (125, 225), (130, 230), (135, 235)]


claimed_user = 'user1'
result = verify_user(sample_sequence, claimed_user)
print(f"Verification result: {result}")  # Returns 1 if matches, 0 otherwise

Verification result: 1
