In [77]:
# below details the logic behind the code 
# and the intention of what we want it to achieve 

# goal is to create an event classification function 
# once an event has been detected in our previous code, this function will then be run
# will classify the text file as being a specific kind of eye movement
# the model has been trained beforehand and is then called

# have downloaded all the eye movement data from github 
# sorted it into different eye movements (up/down/left/right/static/etc.)
# can then choose what types of movements we want to train it on

# function will take in a text file and output a label
# will use random forest

In [78]:
# loading packages 

import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [79]:
# reading in all the eye movement files
# storing them as a dataframe with two columns: label and data
# label is the eye movement type (ex: up)
# data is a list of the numbers that were within the file 

# whoever is running the code will need to change this 

main_folder = '/Users/sfoulsham/Desktop/data3888/classifier_data'

data = []

# iterating over each folder in the main folder

for folder_name in os.listdir(main_folder):
    folder_path = os.path.join(main_folder, folder_name)
    
    # if the item is a folder
    
    if os.path.isdir(folder_path):
        
        # iterate over each file in that folder 
        
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            
            # if the item is a file 
            
            if os.path.isfile(file_path):
                
                # store the file content as a list 
                
                try:
                    with open(file_path, 'r', encoding='utf-8') as file:
                        numbers = file.read().splitlines()
                except UnicodeDecodeError:
                    continue
                
                # creating a dictionary 
                # with the folder name as the label and file contents as the data 
                
                data.append({'Label': folder_name, 'Data': numbers})

# making into a dataframe 

df = pd.DataFrame(data)

# checking the output 

print(df.head())
print()
print(df['Label'])

   Label                                               Data
0  right  [2.5503625405089, 25.2599366509078, 25.4775252...
1  right  [0.493260039543543, 67.8229558014826, 68.36925...
2  right  [2.75057737274778, 112.900990674505, 113.81498...
3  right  [0.264083297185266, -13.2428408194973, -13.308...
4  right  [4.58043415138604, 103.296491526989, 104.11848...

0             right
1             right
2             right
3             right
4             right
          ...      
72    eyebrow_raise
73    eyebrow_raise
74    eyebrow_raise
75    eyebrow_raise
76    eyebrow_raise
Name: Label, Length: 77, dtype: object


In [80]:
# now creating our random forest model 

def train_random_forest(dataframe):

    X = pd.DataFrame(dataframe['Data'].values.tolist())
    y = dataframe['Label']
    
    # handling missing 
    imputer = SimpleImputer(strategy='mean')
    X = imputer.fit_transform(X)
    
    # splitting the data into training and testing sets 
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    clf = RandomForestClassifier()
    
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    
    return clf

trained_classifier = train_random_forest(df)

Accuracy: 0.8125


In [81]:
def classify_eye_movement(file_path, classifier):

    # reading in the file 
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            numbers = file.read().splitlines()
    except UnicodeDecodeError:
        print("Unable to read the file due to encoding issues.")
        return None
    
    # preprocessing the data to match the format expected by the classifier
    
    # converting the numbers to float values 
    
    try:
        numbers = [float(num) for num in numbers]
    except ValueError:
        print("Error converting string data to float values.")
        return None
    
    # want the file to have the same number of features that the classifier was trained on
    # if the number of features is less than expected, pad with zeros 
    
    if len(numbers) < 199960:
        numbers += [0] * (199960 - len(numbers))
    
    # also address if the features is more than expected
    
    elif len(numbers) > 199960:
        numbers = numbers[:199960]
    
    # converting to a numpy array with a single row 
    
    X = np.array([numbers])
    
    predicted_label = classifier.predict(X)[0]
    
    return predicted_label

# practicing

print(classify_eye_movement('/Users/sfoulsham/Desktop/data3888/testing_examples/r1.txt', 
                            trained_classifier))

print(classify_eye_movement('/Users/sfoulsham/Desktop/data3888/testing_examples/l1.txt', 
                            trained_classifier))

print(classify_eye_movement('/Users/sfoulsham/Desktop/data3888/testing_examples/s1.txt', 
                            trained_classifier))

print(classify_eye_movement('/Users/sfoulsham/Desktop/data3888/testing_examples/er1.txt', 
                            trained_classifier))

right
left
eyebrow_raise
eyebrow_raise
