In [1]:
import pandas as pd
import numpy as np
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, RepeatVector, TimeDistributed, Concatenate

# Paste the entire data as a string (replace with actual data)
data_str= pd.read_csv(r"C:\Users\Admin\Desktop\Keystroke_analysis\KeystrokeLoggingApplication\src\Keystrokes.csv", on_bad_lines='skip')
data.head()

# Load the data
df = pd.read_csv(StringIO(data_str))

# Preprocess
le = LabelEncoder()
df['Target'] = le.fit_transform(df['Target'])  # Genuine -> 1, Imposter -> 0

X = df.iloc[:, 1:32].values  # Features (timings)
y = df['Target'].values

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))

# 2. Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

# 3. TimeNet-like Model (GRU Autoencoder for Embeddings + Classifier)
# Reshape for RNN: (samples, timesteps, features) - here timesteps=31, features=1 (univariate)
X_train_resh = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_resh = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Define TimeNet Autoencoder
def build_timenet_autoencoder(input_shape):
    input_layer = Input(shape=input_shape)
    
    # Encoder: 3 GRUs with 60 units each
    e1 = GRU(60, return_sequences=True)(input_layer)
    e2 = GRU(60, return_sequences=True)(e1)
    encoded = GRU(60, return_sequences=False)(e2)  # Final hidden state as embedding

    # Decoder: Repeat and reverse to reconstruct
    decoded = RepeatVector(input_shape[0])(encoded)
    d1 = GRU(60, return_sequences=True)(decoded)
    d2 = GRU(60, return_sequences=True)(d1)
    decoded = GRU(60, return_sequences=True)(d2)
    output_layer = TimeDistributed(Dense(1))(decoded)  # Output reconstruction

    autoencoder = Model(inputs=input_layer, outputs=output_layer)
    autoencoder.compile(optimizer='adam', loss='mse')
    
    return autoencoder, Model(inputs=input_layer, outputs=encoded)  # Return autoencoder and encoder

input_shape = (31, 1)  # 31 timings, 1 feature
autoencoder, encoder = build_timenet_autoencoder(input_shape)

# Train autoencoder (unsupervised on all X)
X_resh = X.reshape((X.shape[0], X.shape[1], 1))
autoencoder.fit(X_resh, X_resh, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Extract embeddings
embed_train = encoder.predict(X_train_resh)
embed_test = encoder.predict(X_test_resh)

# Train KNN on embeddings
knn_time = KNeighborsClassifier(n_neighbors=5)
knn_time.fit(embed_train, y_train)
y_pred_time_knn = knn_time.predict(embed_test)
print("TimeNet + KNN Accuracy:", accuracy_score(y_test, y_pred_time_knn))
print(classification_report(y_test, y_pred_time_knn))

# Train LR on embeddings
lr_time = LogisticRegression(max_iter=1000)
lr_time.fit(embed_train, y_train)
y_pred_time_lr = lr_time.predict(embed_test)
print("TimeNet + LR Accuracy:", accuracy_score(y_test, y_pred_time_lr))
print(classification_report(y_test, y_pred_time_lr))

ModuleNotFoundError: No module named 'tensorflow'