# GJT Automation

Step 1. Find the location of SPOLI and BLAST GJT folders

In [1]:
%set working directory
cd /Volumes/data-1/projects/blast/matlab_scripts/gjt_analysis/
%Find the location of the SPOLI files
spoli_folder = '/Volumes/data-1/projects/spoli/raw_sl_data';
%Find the location of the BLAST files
blast_folder = '/Volumes/data-1/projects/blast/data/online_sl/blast_child';

Step 2. List the contents of SPOLI and BLAST folder and combine the files

In [None]:
%list the content of the SPOLI folder
spoli_files=dir(fullfile(spoli_folder, '*gj.csv')); 
%lists the content of the BLAST folder
blast_files=dir(fullfile(blast_folder, '*gj.csv')); 
%Combine SPOLI and BLAST files
gjt_files = vertcat(spoli_files, blast_files);

%Read the scoring conversion table
scoring=xlsread('gjt_standard_score_conversion_table.xlsx');

Step 3. Find the location of the spreadsheet with subject's ages, read it, and combine BLAST and SPOLI

In [None]:
%find location of SPOLI ages excel sheet
spoli_age_path = '/Volumes/data-1/projects/blast/demographic_data/spoli_ages.xlsx';
%Read the SPOLI ages
spoli_ages=readtable(spoli_age_path);
%find location of BLAST ages excel sheet
blast_age_path = '/Volumes/data-1/projects/blast/demographic_data/blast_ages.xlsx';
%read the BLAST ages
blast_ages=readtable(blast_age_path);

% Combine them, pulling just the ages and the participant ids
age = vertcat (spoli_ages(:,[1:2]), blast_ages(:,[1:2]));
age_length=height(age(:,1)) -1 ;

Step 4. Create empty lists for raw score, standard score, and participant IDs, hits, false alarms 

In [None]:
%Total number of files
total_files = length(gjt_files); 
%Standard scores
standard_list = string(zeros(total_files,1));
%Raw scores
raw_score_list = zeros(total_files,1);
%ID numbers (strings)
ids = [];
% hits
hits =[];
% false alarms
false_alarms =[];
%a_prime
a_prime =[];

Step 5. Extract relavant data from combined Excel sheet and calcualte raw score

In [None]:
%Read the scoring conversion table
scoring=xlsread('gjt_standard_score_conversion_table.xlsx');

%for loop goes through every file in the selected folder 
for k=1 : total_files 
    %Name of the file
    base_file_name = gjt_files(k).name; 
    %Find participant ID from beginning of file name 
    this_id = (base_file_name(1:11));
    ids = vertcat(ids, base_file_name(1:11));
    %Find corresponding SPOLI GJT files
    if (this_id(1:5)=='spoli')
        %Full file
        full_file = readtable(fullfile(spoli_folder, base_file_name)); 
    end
    %Find corresponding BLAST GJT files
    if (this_id(1:5)=='blast')
        %Full file
        full_file = readtable(fullfile(blast_folder, base_file_name)); 
    end
    % Find only the rows where the participant judged the sentence as correct or incorrect
    % 0 signifies incorrect
    % 2 signifies correct
    responses = full_file(full_file.button_pressed==string ('0') | full_file.button_pressed==string ('2'),:);
    % Add a column with targets
    responses.target = [0 2 2 0 2 0 0 2 0 0 2 2 0 0 0 2 0 2 2 2]';
    % Find the proportion of hits (saying "yes" to a grammatical item)
    this_hit = (height(responses(responses.button_pressed==string ('0') & responses.target==0,:))/10);
    hits = vertcat (hits, this_hit);
    % Find the proportion of false alarms (saying "yes" to an ungrammatical item)
    this_false_alarm = ((height(responses(responses.button_pressed==string ('0') & responses.target==2,:)))/10);
    false_alarms = vertcat(false_alarms, this_false_alarm);
    % Find A'. A' = 0.5 + (y ? x) (1 + y ? x)/ 4y (1 ? x), where x = the proportion of false alarms and y = hits
    a_prime =  vertcat(a_prime, (0.5+(this_hit-this_false_alarm)*(1+this_hit-this_false_alarm)/4*this_hit*(1-this_false_alarm)));
    a_prime_round = round(a_prime,2);
    % Match the first row of scoring instructions, which lists all the raw scores, and finds the row which corresponds to the raw score
    
end

Step 6. Combine raw scores (a_prime), and create a table with the particpant id, hit rate, and false alarm rate

In [None]:
%Combine scores
scores=horzcat(string(ids), hits, false_alarms, a_prime_round);
%create table
titled_scores = array2table(scores, 'VariableNames', {'part_id', 'hit_rate', 'false_alarm_rate', 'a_prime'});

Step 7. Find the age that matches to the corresponding participant's raw score and excludes participants that don't have ages inputted for the standard score calcuation

In [None]:
%%%convert age table to two arrays of id's and ages

%age_id is the first column of the age table that lists participants id
age_id=age(:,1);
%converts the table form of age_id into an array
age_id_array=table2array(age_id);

%ages is the second column of the age table that lists the particpant's age
ages = age(:,2);
%converts table form of ages into an array
ages_array=table2array(ages);

%%%convert table (titled scores) to an array with just the aprime values
%takes the 4th column of titled scores (raw scores) 
aprime_row=titled_scores(:,4);
%converts column of raw scores to an array
aprime_array=table2array(aprime_row);

%length of id's
ids_length = length(ids);

%converts ids list into a list of strings
ids_str=string(ids);

%takes the first row of scoring instructions, which lists all the raw scores
rows_of_raw_score=scoring(:,1); 

%ids_string_all_ids is the string with all the participant ids in it 
%ids_str will exclude the ids that don't have ages inputted 
ids_string_all_ids=ids_str; 

%missing_age will be an empty vector of 0 if there are no participants with missing age information
missing_age = string(zeros(1));
%if statement lets user know if they are missing an age information
if ~isempty(setdiff(ids_str,age_id_array))
    'missing age information for ' + setdiff(ids_str,age_id_array)
    missing_age = setdiff(ids_str,age_id_array);
    missing_age_standards=string(zeros(length(missing_age),1));
    for k=1:length(missing_age)
        missing_age_row = find(strcmp(missing_age(k),ids_str));
        %deletes row in ids, hits, false_alarms, and a_prime table
        ids_str(missing_age_row,:)=[];
        missing_age_standards(k)=string('no age');
    end 
end 

Step 8. Match the participant ID with their raw score & calculate standard score

In [None]:
%length of id that doesn't include the particpants with missing age information
new_ids_length = length(ids_str);

%empty list that for loops adds standard scores to
standard_list = string(zeros(new_ids_length,1));
%for loop goes through every raw score, matches the id of the raw score to the participant id
%to their respective age, and calculates their standard score
for k=1:length(ids) 
    %%%part_id and ages are in different orders, so need to find the correct age in the age table based on part_id%%%
   
        %part_id(1) is the first participant in the titled scores table
        part_id = ids_string_all_ids(k);
        
        %if statement calculates the standard score
        if ismember(part_id,missing_age)
            %if the part_id is part of missing_age, the standard coulumn value should output an "error" message
            standard_list(k)=string('no age information!');
        else 
            %finds the row in the age table that corresponds with the part_id
            row_part_age = find(strcmp(part_id,age_id_array(:,:)));
            %the participant age is the age in age array that corresponds to the correct part_id 
            part_age = ages_array(row_part_age);
            
            %find raw score of participant id
            %finds the row that corresponds to the part_id
            part_raw_score_row = find(strcmp(part_id,titled_scores{:,:}));
            %finds the aprime value that corresponds to the selected part_id
            part_raw_score = aprime_array(part_raw_score_row);
            
            %find row in scoring spreadsheet where the raw score for part_id is located
            raw_score_row = find(rows_of_raw_score==str2double(part_raw_score));
    
        
            %if/else statement defines the column of the scoring spreadsheet that corresponds to the child
            if part_age >= 6 && part_age < 7
                standard_column = 2;
                standard = scoring(raw_score_row, standard_column);
            elseif part_age >= 7 && part_age < 8
                standard_column = 3;
                standard = scoring(raw_score_row, standard_column);
            elseif part_age >= 8 && part_age < 9
                standard_column = 4;
                standard = scoring(raw_score_row, standard_column);
            elseif part_age >= 9 && part_age < 10
                standard_column = 5;
                standard = scoring(raw_score_row, standard_column);
            else 
                %if the age is less than 6 or greater than 9
                standard= string('no age norm');
            end   
            standard_list(k)=standard;
            
        end 
end 


Step 9. Create table which includes participant ID, hit rate, false alarm rate, raw score, and standard score. Save table to data_summaries folder in NAS

In [None]:
%add standard scores to titled scores
scores_with_standard=horzcat(string(ids), hits, false_alarms, a_prime_round, standard_list);
%converts table to array 
scores_table = array2table(scores_with_standard, 'VariableNames', {'part_id','hit_rate', 'false_alarm_rate', 'a_prime','standard_score'})
 
%save scores_table to data summaries folder 
writetable(scores_table,'/Volumes/data-1/projects/blast/data_summaries/blast_online_child/breakdown/gjt_score.csv');