In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset
import numpy as np

In [3]:
# Step 1: Load and Clean the Datasets
fixed_text_typing_df = pd.read_csv('FixedTextTypingDataset.csv', delimiter=';', error_bad_lines=False, engine='python')
free_text_typing_df = pd.read_csv('FreeTextTypingDataset.csv', delimiter=';', error_bad_lines=False, engine='python')
frequency_df = pd.read_csv('FrequencyDataset.csv', delimiter=';', error_bad_lines=False, engine='python')

# Drop any rows with NaN values that resulted from the parsing errors
fixed_text_typing_df.dropna(inplace=True)
free_text_typing_df.dropna(inplace=True)
frequency_df.dropna(inplace=True)



  fixed_text_typing_df = pd.read_csv('FixedTextTypingDataset.csv', delimiter=';', error_bad_lines=False, engine='python')


  free_text_typing_df = pd.read_csv('FreeTextTypingDataset.csv', delimiter=';', error_bad_lines=False, engine='python')


  frequency_df = pd.read_csv('FrequencyDataset.csv', delimiter=';', error_bad_lines=False, engine='python')


In [4]:
# Step 2: Feature Extraction

def extract_features(df):
    features = []
    
    # Convert relevant columns to numeric types
    df['keyDown'] = pd.to_numeric(df['keyDown'], errors='coerce')
    df['keyUp'] = pd.to_numeric(df['keyUp'], errors='coerce')
    df['D1D2'] = pd.to_numeric(df['D1D2'], errors='coerce')
        
    for _, row in df.iterrows():
        key_down_up_diff = row['keyUp'] - row['keyDown']
        inter_key_time = row['D1D2'] if 'D1D2' in row else np.nan
        features.append([key_down_up_diff, inter_key_time])
    
    return np.array(features)


# Example: Extracting features from the fixed text dataset
fixed_text_features = extract_features(fixed_text_typing_df)
fixed_text_labels = fixed_text_typing_df['emotionIndex'].values

# Encoding the labels
label_encoder = LabelEncoder()
fixed_text_labels_encoded = label_encoder.fit_transform(fixed_text_labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(fixed_text_features, fixed_text_labels_encoded, test_size=0.2, random_state=42)

In [5]:
X_train

array([[  nan,  119.],
       [  nan, 1058.],
       [  nan,  602.],
       ...,
       [  nan,  329.],
       [  nan,  296.],
       [  nan,   62.]])

In [10]:
# Step 3: Define PyTorch Dataset and DataLoader
class KeystrokeDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = KeystrokeDataset(X_train, y_train)
test_dataset = KeystrokeDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
# Step 4: Define the Model
class EmotionClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(EmotionClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        c0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

input_size = 2  # Number of features
hidden_size = 64
num_classes = len(label_encoder.classes_)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = EmotionClassifier(input_size, hidden_size, num_classes).to(device)

In [12]:
# Step 5: Train the Model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(features.unsqueeze(1))
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: nan
Epoch [2/10], Loss: nan
Epoch [3/10], Loss: nan
Epoch [4/10], Loss: nan
Epoch [5/10], Loss: nan
Epoch [6/10], Loss: nan
Epoch [7/10], Loss: nan
Epoch [8/10], Loss: nan
Epoch [9/10], Loss: nan
Epoch [10/10], Loss: nan


In [13]:
# Step 6: Evaluate the Model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features.unsqueeze(1))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test set: {100 * correct / total:.2f}%')


Accuracy of the model on the test set: 21.63%


In [15]:
# Step 7: Map model outputs to emotional states
def predict_emotion(model, features):
    model.eval()
    with torch.no_grad():
        features = torch.tensor(features, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        return label_encoder.inverse_transform(predicted.cpu().numpy())

In [16]:
# Example: Predicting emotion for a new sample
sample_feature = [120, 300]  # Example feature vector
predicted_emotion = predict_emotion(model, sample_feature)
print(f'Predicted Emotion: {predicted_emotion[0]}')

Predicted Emotion: A
