# This is an analysis function whose purpose is to read and arrange the LogFiles

## I. Importation of modules and functions from python scripts

In [None]:
# We import the module "os" whose purpose is to dialogue with the OS
import os
# We import numpy and we will refer to it as "np".
import numpy as np

# The function os.listdir makes a list of what is inside 'log_files'
print(os.listdir("log_files/L1"))

## II. Setting the path to the logfile we want to analyse

Using the same function as before, we make a list of what is inside the student's file. This is only an example, and it uses the path to the files in MY computer, you'll have to put your own path if you want it to work!

We set the *example_directory*, which is the directory that contains all the logfiles we want to analyse.

<span style="text-decoration: underline">Tip</span>: In order to find the path to the logfiles, I recommend unzipping all the files and putting them in three different directories (ex: L1, L2, L3). Once you open the jupyter notebook from the directory which contains the three L1/2/3 directories using a terminal, you can find the path to your documents easily. 

<span style="text-decoration: underline">Tip 2</span>: A path is easy to find, it is a succession of files until the last one is reached. In our case, we first have the general directory *log_files*, then *L1*, followed by a random student (*John Doe*), and finally the last two logFiles files.

In [None]:
example_directory = "log_files/L1/John Doe_Number/logFiles/logFiles/"

We use the __os.listdir__ function to see what documents are contained in the *example_directory*.

In [None]:
# listdir makes a list of the content
# of what is in parentheses.
os.listdir(example_directory)

As we can only analyse AFC files with the following functions, we set the path *file_ex* and *file_ex2* as the example_directory + the path to an AFC file (2AFC/5AFC). We found these paths with the previous __os.listdir__  function, which made a list of the files contained in the *example_directory*.

In [None]:
# The first example will be called file_ex its path 
# is the path contained in "example_directory", and
# the path to a 2AFC_a_ file, you can change it by
# choosing another path in the previous list.
file_ex = example_directory + "2AFC_a_Mon_Feb_15_14_28_35_2021.log"
# The second example will be called file_ex2, it is
# a 5AFC_2_ exercise, you can change it as well.
file_ex2 = example_directory + "5AFC_2_Mon_Feb_15_14_40_26_2021.log"

## III. Opening and reading the files

We use the function __open__ to open the file and read it ("r"). Then, we retrieve the lines and close the file. 
The function __print__ in the middle is only here for aesthetic purposes, to make it easier to differenciate the treatment of *file_ex* and *file_ex2*. "print("")" will print a blank line, while "print("______")" will display a line in the middle.

In [None]:
# We create a new variable "f" which 
# opens file_ex and reads it ("r").
f = open(file_ex, 'r')
# we assign the read lines 
# to the variable "lines"
lines = f.readlines()
# we close the file (f.close()).
f.close()
# we print the path to the document
print(file_ex)
print("")
# we print the lines contained in the file
print(lines)
print("_______________________________________________________________________________________________")
print("")
# we do the same for the second exercise
f = open(file_ex2, 'r')
lines2 = f.readlines()
f.close()
print(file_ex2)
print("")
print(lines2)

## IV. Cleaning the lines

### 1. Getting rid of the tabulations and line breaks

As you can see in the previous cell, there are "\t" and "\n" when we print the lines. "\t" means that there is a space between two items, the "\n" means that there is a line break. These should not be visible, so we want to delete them.

In [None]:
# We create a new loop variable called "i", and we say  
# that for "i" in range of the length (=len) of the 
# lines that we retrieved earlier, all the line breaks 
# (\n) will we replaced by a space ("").
for i in range(len(lines)):
    lines[i] = lines[i].replace("\n", "")
# We recall the path to the file with "print(file_ex)"
print(file_ex)
# we leave a blank line
print(" ")
# After this, we display (=print) the lines in file_ex
print(lines)
print("_______________________________________________________________________________________________")
print("")
# We do the same for file_ex2
for i in range(len(lines2)):
    lines2[i] = lines2[i].replace("\n", "")
print(file_ex2)
print(" ")
print(lines2)


Here, we split the lines according to the tabulations "\t".

In [None]:
# We create a new empty list called "attributes".
# It is empty since there is nothing inside the brackets.
# This means that we will add items later.
attributes = []
# For the variable "l" in the lines we have retrieved
for l in lines:
    # We add the split lines (=l.split) and we add them
    # to the list "attributes" (=attributes.append)
    attributes.append(l.split('\t'))
# We do the same for the second exercise.
attributes2 = []
for l in lines2:
    attributes2.append(l.split('\t'))


### 2. Combining the previous functions and creating a dictionary

In [None]:
# We define a new function called get_lines.
def get_lines(path):
    # We open the file called file_ex, and we read it (='r')
    f = open(file_ex, 'r')
    # The "lines" are the ones that we have retrieved and read.
    lines = f.readlines()
    # We close the document
    f.close()
    # and return the lines
    return lines

# Next, we create a dictionary. The function only 
# takes one attribute, the path to the exercise.
def create_dico(path):
    # We take the lines that were returned in the
    # previous function called get_lines
    lines = get_lines(path)
    # We create a new loop variable called "i", and we  
    # say that for "i" in range of length (=len) of the 
    # lines that we retrieved earlier, all the line 
    # breaks (\n) will we replaced by a space ("").
    for i in range(len(lines)):
        lines[i] = lines[i].replace("\n", "")
    # We create a new empty list called "attributes".
    # It is empty since there is nothing inside the brackets.
    # This means that we will add items later.
    attributes = []
    # For the variable "l" in the lines we have retrieved
    for l in lines:
        # We add the split lines (=l.split) and we add them
        # to the list "attributes" (=attributes.append)
        attributes.append(l.split('\t'))
    # We assign the first line (0) to the date    
    date = attributes[0]
    # The keys to the second (1)
    keys = attributes[1]
    # The statistics of the exercise to the penultimate line (-1)
    stats_total = attributes[-1]
    # The interesting data is in the middle
    # from the third line (2) to the antepenultimate
    data = attributes[2:-1]
    # We delete the empty lines if the length of 
    # the data (=len(data[i])) is equal to 1.
    for i in range(len(data)):
        if len(data[i]) == 1:
            del data[i]
    # The numeric data is:
    numeric = ['Response Time', 'NbErreurs', 'Repetitions']
    # We create a new empty dictionary 
    dico = {}
    # We skim through all the lines in data
    for line in data:
        # For the variable "i" and "key" in the 
        # group of keys (keys = attributs[1])
        for i, key in enumerate(keys):
            # If it is the first time we encounter
            # this key (= key not in dico.keys())
            if key not in dico.keys():
                # then we create a new dictionary
                # especially for this new key
                dico[key] = []
            # if the key is in the list "numeric"
            # (numeric = ['Response Time', 'NbErreurs', 'Repetitions'])
            if key in numeric:
                # then we add (=append) the values
                # to the dictionary
                dico[key].append(float(line[i]))
            # Otherwise, (=else)
            else:
                # if we have already encountered this key
                # we add the values to the dictionary
                # that has already been created.
                dico[key].append(line[i])
    dico["date"] = date
    # For key in the dictionary of keys
    for key in dico.keys():
        # We make an np.array, a table
        # of all the values in the dico
        dico[key] = np.array(dico[key])
    # We return the dictionary
    return dico
    

We can now display a dictionary for *file_ex*.

For all the keys in the file (Sound File, Stimulus, Vowel, Response Time, NbErreurs, Repetitions, and date), we have the related values.

In [None]:
create_dico(file_ex)

We can do the same for *file_ex2*.

In [None]:
create_dico(file_ex2)