In [3]:
#imports
#pip install numpy
import numpy as np
#pip install pandas
import pandas as pd
#pip install --upgrade tensorflow
import tensorflow as tf
from tensorflow import keras
#standard library
import os

In [4]:
#this function allows to read the csv files from the logs of the attentive cursor dataset
#path_logs : path to the logs of the dataset with the files containing the list of inputs for each user
#path_csv : path to the groundtruth.csv containing the attention score of the each user
#dfs : array containing, for each user, its attention score and the count of its inputs
def get_csv_data(path_logs, path_csv):
    data = pd.read_csv(path_csv, sep='\t')
    dfs = []
    for dirname, _, filenames in os.walk(path_logs):
        for filename in filenames:
            if filename.endswith('.csv'):
                file_path = os.path.join(dirname, filename).replace('\\', '/')
                df = pd.read_csv(file_path,sep=' ')
                
                row = data.loc[data['log_id'] == int(filename[:-4])] # https://stackoverflow.com/questions/17071871/how-do-i-select-rows-from-a-dataframe-based-on-column-values
                # print(row)
                arr = row.values.tolist()
                arr.append(df)
                dfs.append(arr)
    return dfs

In [5]:
#loading the data
dfs = get_csv_data('../the-attentive-cursor-dataset/logs', '../the-attentive-cursor-dataset/groundtruth.tsv')

In [6]:
#keeping the attention score and the amount of each input for each user
cleandf = []
for row in range(len(dfs)):
    cleandf.append([dfs[row][0][2],dfs[row][1]['event'].value_counts()])

In [7]:
#removing 'useless input types from the data'
tab = np.array(cleandf, dtype='object')
removed = ['mouseup','focus','load','beforeunload','unload','touchend'] #arbitrary list of input types to remove
for row in tab :
    for input in removed : 
        if input in row[1].keys() :
            row[1].pop(input)

#searching the data for every other input types
all_inputs = []
for row in tab :
    for input in row[1].keys() :
        if input not in all_inputs :
            all_inputs.append(input)
all_inputs.sort()
print(all_inputs)

['blur', 'click', 'contextmenu', 'copy', 'keydown', 'keyup', 'mousedown', 'mousemove', 'mouseover', 'resize', 'scroll', 'select', 'touchmove', 'touchstart']


In [8]:
#normaliations and the removal of extremes data are kept commented because they did not affect the accuracy
#of the model in any significant way but are kept in the code for future devs to know what has been tried on
#this model

#normalisation v1
"""
for input in all_inputs :
    tmp = []
    for row in tab :
        if input in row[1].keys() :
            tmp.append(row[1].get(input))
    mean = np.mean(tmp)
    for row in tab :
        if input in row[1].keys() :
            row[1][input] = row[1][input]/mean

print(tab)
"""
#normalisation v3
"""
for input in all_inputs :
    max = 0
    for row in tab :
        if input in row[1].keys() :
            if row[1].get(input) > max :
                max = row[1].get(input)
    for row in tab :
        if input in row[1].keys() :
            row[1][input] = row[1][input]/max
"""

'\nfor input in all_inputs :\n    max = 0\n    for row in tab :\n        if input in row[1].keys() :\n            if row[1].get(input) > max :\n                max = row[1].get(input)\n    for row in tab :\n        if input in row[1].keys() :\n            row[1][input] = row[1][input]/max\n'

In [9]:
tmp_all_x = [] #stock a python array version of the data shaped as the entry of the model
tmp_all_y = [] #stock a python array version of the data shaped as the results of the model (for training purpose)

for row in tab :
    tmp_all_x.append([])
    for input in all_inputs :
        if input in row[1].keys() :
            tmp_all_x[len(tmp_all_x)-1].append(row[1].get(input))
        else :
            tmp_all_x[len(tmp_all_x)-1].append(0)
    tmp_all_y.append(row[0])

all_x = np.array(tmp_all_x) #numpy array version of the shaped data
all_y = np.array(tmp_all_y)
print(all_x)

[[3 2 1 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [2 0 0 ... 0 0 0]
 ...
 [2 1 0 ... 0 0 0]
 [3 1 0 ... 0 0 0]
 [1 1 0 ... 0 0 0]]


In [10]:
#normalisation v2
"""
for i in range(len(all_inputs)) :
    print(i)
    tmp = []
    for row in all_x :
        tmp.append(row[i])
    mean = np.mean(tmp)
    for row in all_x :
        row[i] = row[i]/mean
    
print(all_x)
"""

#enlevage des données extrèmes (faire une normalisation avant)
"""
i = 0
bad_rows = []
for row in range(len(all_x)) :
    for element in all_x[row] :
        if element > 5 :
            i +=1
            bad_rows.append(row)
            continue

all_x = np.delete(all_x,bad_rows, axis=0)
print(all_x)
"""

'\ni = 0\nbad_rows = []\nfor row in range(len(all_x)) :\n    for element in all_x[row] :\n        if element > 5 :\n            i +=1\n            bad_rows.append(row)\n            continue\n\nall_x = np.delete(all_x,bad_rows, axis=0)\nprint(all_x)\n'

In [18]:

inputs = keras.Input(shape=(len(all_inputs),))
x = tf.keras.layers.Dense(1, activation=tf.nn.relu)(inputs)
outputs = tf.keras.layers.Dense(6, activation=tf.nn.softmax)(x)

model = keras.Model(inputs=inputs, outputs = outputs)

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["accuracy"],
)

#list of inputs (uncoment to see)
print('all used input types : ')
print(all_inputs)
#all_inputs correspond to all the inputs type minus
print('all removed inputs : ')
print(removed)

TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.

In [12]:
print(all_x[1])

[0 1 0 0 0 0 1 9 3 0 7 0 0 0]


In [13]:
history = model.fit(all_x, all_y, epochs=20, validation_split=0.1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model, './model')

In [15]:
num_array = np.array([ 0, 17, 0, 0, 0, 0, 21, 324, 28, 0, 0, 0, 0, 0 ])
prediction = model.predict(num_array.reshape(1, -1))
print(prediction)

[[0.0000000e+00 3.4934153e-33 6.9748394e-25 0.0000000e+00 9.7245544e-01
  2.7544534e-02]]


In [35]:
#attempt at a linear reggression model
data = all_x

data_normalizer = keras.layers.Normalization(input_shape=[14,], axis=None)
data_normalizer.adapt(data)

persuasiveness_model = tf.keras.Sequential([
    data_normalizer,
    keras.layers.Dense(units=1)
])

persuasiveness_model.summary()

persuasiveness_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error',
    metrics=['mae'])

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization_8 (Normalizat  (None, 14)               3         
 ion)                                                            
                                                                 
 dense_14 (Dense)            (None, 1)                 15        
                                                                 
Total params: 18
Trainable params: 15
Non-trainable params: 3
_________________________________________________________________


In [37]:
history = persuasiveness_model.fit(all_x, all_y, epochs=100, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [38]:
persuasiveness_model.predict(data[:10])



array([[3.7076702],
       [3.963278 ],
       [4.0995793],
       [4.1615567],
       [4.064081 ],
       [4.1151037],
       [4.0078044],
       [4.062291 ],
       [3.99364  ],
       [3.944041 ]], dtype=float32)