In [1]:
import speech_recognition as sr
import os
import wave
import scipy.io.wavfile as wav
import librosa
import numpy as np
import json
from vosk import Model, KaldiRecognizer, SetLogLevel
from openpyxl import Workbook, load_workbook
from openpyxl.styles import PatternFill
import matplotlib.pyplot as plt

In [2]:
class Word:
    ''' A class representing a word from the JSON format for vosk speech recognition API '''

    def __init__(self, dict):
        '''
        Parameters:
          dict (dict) dictionary from JSON, containing:
            conf (float): degree of confidence, from 0 to 1
            end (float): end time of the pronouncing the word, in seconds
            start (float): start time of the pronouncing the word, in seconds
            word (str): recognized word
        '''

        self.conf = dict["conf"]
        self.end = dict["end"]
        self.start = dict["start"]
        self.word = dict["word"]

    def to_string(self):
        ''' Returns a string describing this instance '''
        return "{:20} from {:.3f} sec to {:.3f} sec, confidence is {:.2f}%".format(
            self.word, self.start, self.end, self.conf*100)


def is_identical_except_extra_s(str1, str2):
    if len(str1) == len(str2) + 1:
        if str1[:-1] == str2 and str1[-1] == 's':
            return True
    elif len(str2) == len(str1) + 1:
        if str2[:-1] == str1 and str2[-1] == 's':
            return True
    return False


def is_one_char_diff(str1, str2):
    if len(str1) != len(str2):
        return False

    diff_count = 0
    for i in range(len(str1)):
        if str1[i] != str2[i]:
            diff_count += 1
            if diff_count > 1:
                return False

    return diff_count == 1

# Note The threshold depends also on the input volume set on the computer
def _get_voice_onset(signal, threshold = 20, fs=24000, min_time=10):
    '''
    signal : numpy.ndarray
             signal in. Should be the envelope of the raw signal for accurate results
    threshold : int
                Amplitude threshold for voice onset.
                (Threshold = 200 with NYUAD MEG mic at 75% input volume seems to work well)
    fs : int
         Sampling frequency
    min_time : int (ms)
             Time in ms after the threshold is crossed used to calculate
              the median amplitude and decide if it was random burst of noise
              or speech onset.
    '''

    n_above_thresh = int(fs/min_time) # convert time above threshold to number of samples.

    indices_onset = np.where(signal >= threshold)[0] # All indices above threshold
    # Next, find the first index that where the MEDIAN stays above threshold for the next 10ms
    # Not using the MEAN because sensitive to a single extreme value
    # Note 24 points per millesconds (for fs=24000)
    # 10ms = 240 points
    for i in indices_onset:
        median_mintime = np.median(np.abs(signal[i:i+n_above_thresh])) # median value in the timewindow of length min_time
        if median_mintime >= threshold:
            idx_onset = i
            onset_time = idx_onset / float(fs) * 1000.0
            return int(onset_time)
    return np.nan # if no point exceeds the threshold.
                          # Return "None" instead of None in order to be able to append it to a list later on


misidentified_words = {"bee":['b','be','B','beat','beach', 'beer', 'theme','insect','book','big','being','abuse','bay','inside'],
                       "plane":['airplane', 'planner', 'plain','aeroplane'],
                       "pear":['pair', 'pay', 'air','pier','paint','pie','impair','payer'],
                       "sandcastle":['sand', 'same', 'stone', 'castle', 'sun','santos','santa','council'],
                       "boot":['boo', 'shoot', 'bu','boot-up','chew','shoe'],
                       "hanger":['hangar', 'closing', 'hair','hanging','anger','hang'],
                       "bathtub":['bath', 'sink','top'],
                       "ship":['boat', 'sailboat', 'sea', 'sheet','syllable'],
                       "train":['trainer', 'chain', 'training'],
                       "bear":['fare','beer','baron','barren'],
                       "harp":['heart', 'violin', 'hop','arm','higher'],
                       "trumpet":['trumpeter', 'trouble', 'horn', 'trump','drum','trunk','trucks'],
                       "vase":['face', 'bs','boss','ace'],
                       "cannon":['canon', 'canada', 'candidates', 'can', 'kind','kid','candy','wheelbarrow'],
                       "crib":['bed', 'cradle', 'curb', 'caribbean', 'court', 'crapper','crew','cred','craver'],
                       "leg":['lake', 'lang', 'foot', 'logan', 'alike', 'line', 'law','knee','black','bag','dog','deck'],
                       "broom":['brim', 'burn', 'broomstick', 'room', 'blue', 'britain', 'barroom', 'brush', 'bird', 'barone','sweeper','broome', 'brew','bream','boon','brown','mop','mob'],
                       "school":['schoolhouse', 'building', 'ghoul'],
                       "whale":['wheel', 'will', 'wales', 'way', 'weed','win','oil'],
                       "desk":['table', 'drawers', 'deaths','dresser','counter','dust','cabinet'],
                       "table":['desk', 'tape','dust'],
                       "comb":['cone', 'comments', 'cold', 'clone', 'combing', 'calm', 'klum', 'cool','coombe','macomb','helm','cup'],
                       "rose":['rows', 'flower', 'flour','rules'],
                       "camel":['camera', 'candle', 'chemo', 'camo', 'qamar', 'common', 'comment', 'campus', 'capitol','campbell','kim'],
                       "pants":['jeans','genes', 'ponce', 'trousers', 'pets','pounds','pans'],
                       "dress":['address','dresses','dresden','trust','dresser','rest'], #for some reason doesn't work
                       "strawberry":['shrubbery', 'strawberries', 'struggle',"stronger",'shopper'],
                       "arrow":['era', 'barrel', 'air', 'aura','iru','dirt'],
                       "dominoes":['dominance', 'domino', 'dominic', 'dice', 'dominant', 'dominators','dominates','remote','remotes','domain'],
                       "horse":['course', 'force', 'forces', 'forest', 'voice', 'hold','hordes','whole','zebra','whores'],
                       "hammer":['humor', 'counter', 'hamra',"hammer's", 'hama', 'summer','helmet',"haven't",'hip','hand','cameras'],
                       "bed":['dead', 'photos','bit','thud'],
                       "nail":['now', 'knee', 'kneeling', 'meal', 'news','name','kneel','screw','pit','pin','navel','kneeler'],
                       "sandal":['slipper', 'shoe','flip-flop', 'slip', 'sand','slide'],
                       "hippo":['hipaa', 'hippel',"hippopotamus", 'hit', 'hip','rhino','ebook','have'],
                       "card":['car','carter','current', 'cod','court'],
                       "coat":['trenchcoat','code','jacket', 'outfit', 'dress', 'court', 'robe', 'co','coaches','jacketed','towel','chocolate','clothing'],
                       "hair":['wig','here', 'hat', 'hara','girl','week','way','heron','herons'],
                       "bottle":['bottom', 'glass','box','bundle'],
                       "seal":['ceo', 'ceiling','otter','sea','c','psu','thiel','shoe'],
                       "rake":['reek', 'rig', 'b', 'break', 'rick', 'right', 'raid', 'broom','regean','rigor','raker','breaker'],
                       "knife":['knee', 'night','life','next'],
                       "giraffe":['draft', 'draw'],
                       "mittens":['mountains', 'mins', "men's", 'man', 'muppet', 'gloves', 'mentions', 'minions','midland','maintenance','medicine','misses'],
                       "watch":['watcher', 'watching', 'wash', 'water', 'botch', 'washington','clock','wallet'],
                       "dress":['address'],
                       "bathtub":['tub'],
                       "necklace":['net', 'neck', 'bracelet', 'nicholas', 'heart', 'ductless','nicolas'],
                       "tie":['time', 'tai', 'tire', 'top','high','type','ty','suit'],
                       "zebra":['sea', 'zipper', 'disease','sipa','cetera','zip','zero','seabra'],
                       "fish":['field','fisher','fishes','foot','face'],
                       "briefcase":['brief', 'suitcase', 'base', 'purse','bag'],
                       "pool":['swimming'],
                       "elephant":['yellowfin', 'other', 'elegant', 'lesson','alpha','element','elsa'],
                       "beach":['beaches'],
                       "doctor":['dock', 'physician','da','man','mind','mine','teacher','scientist','scientists','police','dude','doctorate'],
                       "saw":['slaw', 'sa', 'size', 'store', 'stock', 'sore','sauce','song','soul','sword','chainsaw','assault','salt'],
                       "bus":['buses', 'van', 'best', 'buffs','boss','bassoon','bustle'],
                       "newspaper":['paper','papers', 'new', 'knees'],
                       "mug":['cup', 'mugen','cook','monk'],
                       "cup":['mug'],
                       "bird":['bored', 'food', 'third', 'burden', 'beard','berta','bud','hummingbird'],
                       "honey":['honeycomb', 'honeymoon', "honey's", 'hoodie','funny','hunting','funding'],
                       "fire":['forest', 'campfires', 'fireplace','flyers', 'furs'],
                       "skirt":['dress','skate', 'schedule','skirting','scott','ska','dresser','skin','scourge','bluffs','screen','script'],
                       "crown":['cron', 'cronin', 'chrome', 'ground', 'count', 'current','tiara','crone','crap','crime'],
                       "spoon":['spooner', 'spin', 'screen','spooning','food','student'],
                       "girl":['grow','crew'],
                       "nurse":['maid', 'news', 'nasa', 'waitress', 'nursery', 'girl','somebody','nerds'],
                       "stapler":['staple', 'stupid','stupor','steeper'],
                       "hospital":['osp', 'close', 'hostel', 'house','hole','husband','hostile'],
                       "glass":['cup','goss'],
                       "dog":['puppy', 'dogara', 'door','dark'],
                       "purse":['bag', 'person', 'puss','satchel','briefcase'],
                       "tunnel":['mine', 'railway', 'railroad', 'chain', 'train', 'cave'],
                       "church":['school', 'chapel', 'churches','charge','trick'],
                       "shovel":['shop', 'chabot', 'shepard', 'shuttle', 'spade', 'shoveling','shivers','shows','shelter','shove'],
                       "lobster":['crab', 'crop', 'lab','luxury','love'],
                       "stethoscope":['set', 'step','stuff','desert','telescope'],
                       "star":['start', 'stall','store'],
                       "door":['doran'],
                       "pill":['pelicans', 'pool', 'pools', 'pins','medicine', 'pillows','vitamins'],
                       "yarn":['yuan', 'jan', 'ball','cotton','wool'],
                       "road":['broadened', 'path', 'highway','world'],
                       "balloon":['boon', 'blue', 'buildings', 'baloo', 'bullet','bonus','bulletin','upolu','ballooning'],
                       "vest":['bust', 'fast', 'first','festive','shirt','bus','waistcoat','face'],
                       "heart":['hart', 'harder','hot'],
                       "throne":['chairs', 'chin', 'road', 'surround', 'ground', 'throwing', 'wrong','throat','round','crown','phone','thorough','thrown','cheer','certain'],
                       "baby":['babies', 'bb', 'child','bay','base','paper'],
                       "ring":['ringer', 'wedding', 'rooting', 'diamond','bring','rain'],
                       "crab":['crop', 'cat', 'kravis', 'lobster', 'crime','crumb'],
                       "water":['drop', 'raindrop', 'job', 'droplet','trump','draw','word','candle','waterdrop','truck'],
                       "dart":['dar', 'darker', 'dont', 'air','dot','door','needle'],
                       "acorn":['corner', 'equal','acres','nut','equipment','ache'],
                       "tree":['cheat', 'treat'],
                       "lamp":['link', 'lab', 'length', 'land','lapping', 'lip','eleven','laugh','level','light'],
                       "truck":['check', 'trucker', 'car', 'pickup','choking','trip'],
                       "grass":['gas', 'girls','glasses','growth'],
                       "keyboard":['key', 'computer','keeper'],
                       "scarf":['sky', 'scar','score','scoff','scarves','skier'],
                       "headphones":['headset'],
                       "lightning":['thunder', 'enlightening', 'bolt','lining'],
                       "wallet":['microwave','water','wallach','window'],
                       "radio":['alarm', 'thermometer','tape','video'],
                       "sink":['sinker', 'think','seek'],
                       "shirt":['suit', 'jacket', 'coat', 'code','shop','shot','blouse','shoot','sweater','blast','t-shirt'],
                       "key":['kids','case','tea','king','keep'],
                       "fork":['fourth', 'falcon', 'four', 'fucking','forecasts','fluke','port','food','fog','fuck','park','floor','falls'],
                       "suit":['coat', 'code','tuxedo','jacket','zoo','co','costumes','asu'],
                       "stick":['branch', 'twig', 'steak', 'branches','tickets'],
                       "mop":['room', 'broom','mouth','monk'],
                       "boy":['child','kid'],
                       "pillow":['pillar', 'pennant'],
                       "pan":['past', 'saucepan', 'pod', 'pop', 'ten','pain','pin','panels'],
                       "leaf":['least','leave','lie'],
                       "nest":['nesta', 'eggs', 'nurse', 'dust','mist'],
                       "tape":['tip', 'taper','team','team','tube'],
                       "hat":['hart', 'house','how','half','fedora','half'],
                       "cave":['kate','key','tunnel'],
                       "cat":['couch','catch'],
                       "nose":['news'],
                       "angel":['agent', 'age','intro','angeles','ange'],
                       "chair":['cheer'],
                       "lizard":['dessert', 'alligator', 'salamander', 'iguana', 'guana','delivery','gecko','blizzard'],
                       "book":['bible','buck','button'],
                       "ice":['nice','eyes'],
                       "scissors":['surface', 'system','center','listeners','sisters','desert'],
                       "helicopter":['helicop'],
                       "basket":['boss','best','back','bed'],
                       "frog":['front','toad','folk','frogger','fraud','frock','prague','phone','fog'],
                       "sheep":['shape','cheaper'],
                       "gun":['guts','good','revolver'],
                       "clock":["o'clock"],
                       "sun":['so'],
                       "anchor":['acres', 'anca','anger','anger','ink','anchorage'],
                       "computer":['desktop','competes','monitor','tv','keyboard','q'],
                       "car":['call','code'],
                       "house":['home','barn'],
                       "foot":['feet','ankle'],
                       "money":['cash'],
                       "goat":['ram','gas','go','good','sheep','gutter','gulf']}
banned_words = {"the","is","and","in"}
#KEY FOR HIGHLIGHTING
#Yellow: There is >300ms or <25ms gap between Speech Onset & A Onset or Speech onset > A onset
#Red: If ABC onset are not in chronological order

In [None]:
# specify the directory containing the .wav files
dir_path = r"C:\Users\James Belanger\Documents\Relatedness_and_Frequency\Cache"
model_path = r"C:\Users\James Belanger\.cache\Vosk\vosk-model-en-us-0.22"
directories = [name for name in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, name))]
model = Model(model_path)
col_num=[9,10,11,12]
yellow_fill = PatternFill(start_color='FFFF00', end_color='FFFF00', fill_type='solid')
red_fill = PatternFill(start_color='FF0000', end_color='FF0000', fill_type='solid')
for directory in directories:
    
    # get the list of .wav files in the directory, sorted by numerical value
    file_list = sorted([f for f in os.listdir(os.path.join(dir_path, directory)) if f.endswith(".wav")], key=lambda x: int(x.split(".")[0]))
    if not(directory + "_Results.xlsx" in file_list):
        
        # create a recognizer object
        r = sr.Recognizer()

        # load the pre-existing workbook and select the active worksheet
        workbook = load_workbook(filename=os.path.join(dir_path, directory, directory+".xlsx"))
        worksheet = workbook.active

        # starting row for all measurements
        next_row = 3
        # traverse the directory and find all the .wav files
        for file_name in file_list:
            if file_name.endswith(".wav"):
                # specify the path to the .wav file
                wav_file = os.path.join(dir_path, directory, file_name)

                # use librosa to load the .wav file and get the time series and sample rate
                y, sample_rate = librosa.load(wav_file, sr=None)
                
                # Set threshold
                threshold = .001

                # Find indices where absolute value is greater than threshold
                indices = np.where(np.abs(y) > threshold)[0]
                #first_index = indices[0]
                
                # use the recognizer to open and read the .wav file
                fs, signal = wav.read(wav_file)
                onset_ms =_get_voice_onset(signal)
                first_index = int(fs * onset_ms / 1000.0)
                wf = wave.open(wav_file, "rb")
                rec = KaldiRecognizer(model, wf.getframerate())
                rec.SetWords(True)

                # get the list of JSON dictionaries
                results = []
                # recognize speech using vosk model
                while True:
                    data = wf.readframes(4000)
                    if len(data) == 0:
                        break
                    if rec.AcceptWaveform(data):
                        part_result = json.loads(rec.Result())
                        results.append(part_result)
                part_result = json.loads(rec.FinalResult())
                results.append(part_result)

                # convert list of JSON dictionaries to list of 'Word' objects
                list_of_words = []
                for sentence in results:
                    if len(sentence) == 1:
                        # sometimes there are bugs in recognition 
                        # and it returns an empty dictionary
                        # {'text': ''}
                        continue
                    for obj in sentence['result']:
                        w = Word(obj)  # create custom Word object
                        list_of_words.append(w)  # and add it to list

                wf.close()  # close audiofile
                
                #WHITE SPACE
                win = 21000 #samples per 500 ms, all files are 24000Hz(samples/sec) (24000x(x secs), win is set at .9 something sec)
                null_spaces= []
                ii = first_index
                next_column = 13
                while ii < len(y)-win:
                    yi_max=np.amax(y[ii:ii+win])
                    if yi_max < threshold:
                            begin = ii
                            while ii < len(y)-win:
                                yi_max=np.amax(y[ii:ii+win])
                                if yi_max > threshold:
                                    end= ii+win
                                    null_spaces.append([int((begin/sample_rate)*1000),int((end/sample_rate)*1000)])
                                    break
                                ii+=1
                    ii+=1
                if null_spaces:
                    for null_values in null_spaces:
                        for null_value in null_values:
                            worksheet.cell(row=next_row, column=next_column).value = null_value
                            next_column += 1
                            
                            
                # add the onset time in ms to the Excel worksheet
                worksheet.cell(row=next_row, column=9).value = onset_ms 
                response = ""
                for word in list_of_words:
                    if word.word == "in":
                        response = response + "and "
                    else:
                        response = response + word.word + " "
                response = response[:-1]
                worksheet.cell(row=next_row, column=7).value = response

                target = worksheet.cell(row=next_row, column=6).value

                if target == response:
                    worksheet.cell(row=next_row, column=8).value = 1
                    worksheet.cell(row=next_row, column=7).value = "c"
                else:
                    worksheet.cell(row=next_row, column=8).value = None


                onset = []
                columns_for_ABC = [[3,10],[4,11],[5,12]]
                ABC_words = []

                for columns in columns_for_ABC:
                    ABC_words.append(worksheet.cell(row=next_row, column=columns[0]).value)
                    target_words = [ABC_words[-1]]
                    if target_words[0] in misidentified_words.keys():
                        target_words = target_words + misidentified_words[target_words[0]]
                    for alternates in target_words:
                        word = 0
                        while word < len(list_of_words):
                            if alternates == list_of_words[word].word:
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            elif is_identical_except_extra_s(alternates, list_of_words[word].word):
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            elif is_one_char_diff(alternates, list_of_words[word].word) and not (list_of_words[word].word in banned_words):
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            word+=1
                        if word >= len(list_of_words):
                            worksheet.cell(row=next_row, column=columns[1]).value = None
                            onset.append(None)
                        else:
                            break
                if (0 if worksheet.cell(row=next_row, column=10).value is None else worksheet.cell(row=next_row, column=10).value
                   ) - (0 if worksheet.cell(row=next_row, column=9).value is None else worksheet.cell(row=next_row, column=9).value) > 300:
                    worksheet.cell(row=next_row, column=9).fill = yellow_fill
                if (10000 if worksheet.cell(row=next_row, column=10).value is None else worksheet.cell(row=next_row, column=10).value
                   ) - (0 if worksheet.cell(row=next_row, column=9).value is None else worksheet.cell(row=next_row, column=9).value) < 25:
                    worksheet.cell(row=next_row, column=9).fill = yellow_fill
                for col_i, col_1 in enumerate(col_num):
                    for col_2 in col_num[col_i+1:]:
                        if (0 if worksheet.cell(row=next_row, column=col_1).value is None else worksheet.cell(row=next_row, column=col_1).value) > (10000 if worksheet.cell(row=next_row, column=col_2).value is None else worksheet.cell(row=next_row, column=col_2).value):
                            worksheet.cell(row=next_row, column=col_1).fill = red_fill
                            worksheet.cell(row=next_row, column=col_2).fill = red_fill
                next_row += 1
                del(wf, rec) 
            print("file complete", directory, file_name, ":", response, "Speech onset:", onset_ms,"A onset:", onset[0],"B onset:",onset[1],"C onset:",onset[2])
        del(r)
        # save the workbook to a file
        workbook.save(os.path.join(dir_path, directory, directory + "_Results.xlsx"))
print("All done!")
del model

In [3]:
# specify the directory containing the .wav files
dir_path = r"C:\Users\James Belanger\Documents\Relatedness_and_Frequency\Cache"
model_path = r"C:\Users\James Belanger\.cache\Vosk\vosk-model-en-us-0.22"
directories = [name for name in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, name))]
model = Model(model_path)
col_num=[9,10,11,12]
yellow_fill = PatternFill(start_color='FFFF00', end_color='FFFF00', fill_type='solid')
red_fill = PatternFill(start_color='FF0000', end_color='FF0000', fill_type='solid')
for directory in directories:
    
    # get the list of .wav files in the directory, sorted by numerical value
    file_list = sorted([f for f in os.listdir(os.path.join(dir_path, directory)) if f.endswith(".wav")], key=lambda x: int(x.split(".")[0]))
    if not(directory + "_results.xlsx" in file_list):
        
        # create a recognizer object
        r = sr.Recognizer()

        # load the pre-existing workbook and select the active worksheet
        workbook = load_workbook(filename=os.path.join(dir_path, directory, directory+".xlsx"))
        worksheet = workbook.active

        # starting row for all measurements
        next_row = 3
        # traverse the directory and find all the .wav files
        for file_name in file_list:
            if file_name.endswith(".wav"):
                # specify the path to the .wav file
                wav_file = os.path.join(dir_path, directory, file_name)

                # use librosa to load the .wav file and get the time series and sample rate
                y, sample_rate = librosa.load(wav_file, sr=None)
                
                # use the recognizer to open and read the .wav file
                with sr.AudioFile(wav_file) as source:
                    audio = r.record(source)
                 # use Vosk API to transcribe the audio text and find onset times of ABC
                    try:
                        # Set threshold
                        threshold = .002

                        # Find indices where absolute value is greater than threshold
                        indices = np.where(np.abs(y) > threshold)[0]

                        # Get the index of the first element in the indices array
                        first_index = indices[0]

                        # Calculate onset time in seconds
                        onset_time = first_index / sample_rate

                        # convert the onset time from seconds to milliseconds
                        onset_ms = int(onset_time * 1000)
                    except sr.UnknownValueError:
                        print("ERROR", file_name)
                    except sr.RequestError as e:
                        print("ERROR", file_name, "; {0}".format(e))
                wf = wave.open(wav_file, "rb")
                rec = KaldiRecognizer(model, wf.getframerate())
                rec.SetWords(True)
                # get the list of JSON dictionaries
                results = []
                # recognize speech using vosk model
                while True:
                    data = wf.readframes(4000)
                    if len(data) == 0:
                        break
                    if rec.AcceptWaveform(data):
                        part_result = json.loads(rec.Result())
                        results.append(part_result)
                part_result = json.loads(rec.FinalResult())
                results.append(part_result)

                # convert list of JSON dictionaries to list of 'Word' objects
                list_of_words = []
                for sentence in results:
                    if len(sentence) == 1:
                        # sometimes there are bugs in recognition 
                        # and it returns an empty dictionary
                        # {'text': ''}
                        continue
                    for obj in sentence['result']:
                        w = Word(obj)  # create custom Word object
                        list_of_words.append(w)  # and add it to list

                wf.close()  # close audiofile
                
                #WHITE SPACE
                win = 21000 #samples per 500 ms, all files are 24000Hz(samples/sec) (24000x(xsec), win is set at .9 something sec)
                null_spaces= []
                ii = first_index
                next_column = 13
                while ii < len(y)-win:
                    yi_max=np.amax(y[ii:ii+win])
                    if yi_max < threshold:
                            begin = ii
                            while ii < len(y)-win:
                                yi_max=np.amax(y[ii:ii+win])
                                if yi_max > threshold:
                                    end= ii+win
                                    null_spaces.append([int((begin/sample_rate)*1000),int((end/sample_rate)*1000)])
                                    break
                                ii+=1
                    ii+=1
                if null_spaces:
                    for null_values in null_spaces:
                        for null_value in null_values:
                            worksheet.cell(row=next_row, column=next_column).value = null_value
                            next_column += 1
                            
                            
                # add the onset time in ms to the Excel worksheet
                worksheet.cell(row=next_row, column=9).value = onset_ms 
                response = ""
                for word in list_of_words:
                    if word.word == "in":
                        response = response + "and "
                    else:
                        response = response + word.word + " "
                response = response[:-1]
                worksheet.cell(row=next_row, column=7).value = response

                target = worksheet.cell(row=next_row, column=6).value

                if target == response:
                    worksheet.cell(row=next_row, column=8).value = 1
                    worksheet.cell(row=next_row, column=7).value = "c"
                else:
                    worksheet.cell(row=next_row, column=8).value = None


                onset = []
                columns_for_ABC = [[3,10],[4,11],[5,12]]
                ABC_words = []

                for columns in columns_for_ABC:
                    ABC_words.append(worksheet.cell(row=next_row, column=columns[0]).value)
                    target_words = [ABC_words[-1]]
                    if target_words[0] in misidentified_words.keys():
                        target_words = target_words + misidentified_words[target_words[0]]
                    for alternates in target_words:
                        word = 0
                        while word < len(list_of_words):
                            if alternates == list_of_words[word].word:
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            elif is_identical_except_extra_s(alternates, list_of_words[word].word):
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            elif is_one_char_diff(alternates, list_of_words[word].word) and not (list_of_words[word].word in banned_words):
                                worksheet.cell(row=next_row, column=columns[1]).value = int(list_of_words[word].start*1000)
                                onset.append(int(list_of_words[word].start*1000))
                                break
                            word+=1
                        if word >= len(list_of_words):
                            worksheet.cell(row=next_row, column=columns[1]).value = None
                            onset.append(None)
                        else:
                            break
                if (0 if worksheet.cell(row=next_row, column=10).value is None else worksheet.cell(row=next_row, column=10).value
                   ) - (0 if worksheet.cell(row=next_row, column=9).value is None else worksheet.cell(row=next_row, column=9).value) > 300:
                    worksheet.cell(row=next_row, column=9).fill = yellow_fill
                if (10000 if worksheet.cell(row=next_row, column=10).value is None else worksheet.cell(row=next_row, column=10).value
                   ) - (0 if worksheet.cell(row=next_row, column=9).value is None else worksheet.cell(row=next_row, column=9).value) < 25:
                    worksheet.cell(row=next_row, column=9).fill = yellow_fill
                for col_i, col_1 in enumerate(col_num):
                    for col_2 in col_num[col_i+1:]:
                        if (0 if worksheet.cell(row=next_row, column=col_1).value is None else worksheet.cell(row=next_row, column=col_1).value) > (10000 if worksheet.cell(row=next_row, column=col_2).value is None else worksheet.cell(row=next_row, column=col_2).value):
                            worksheet.cell(row=next_row, column=col_1).fill = red_fill
                            worksheet.cell(row=next_row, column=col_2).fill = red_fill
                next_row += 1
                del(wf, rec) 
            print("file complete", directory, file_name, ":", response, "Speech onset:", onset_ms,"A onset:", onset[0],"B onset:",onset[1],"C onset:",onset[2])
        del(r)
        # save the workbook to a file
        workbook.save(os.path.join(dir_path, directory, directory + "_Results.xlsx"))
print("All done!")
del model

file complete OA23_Freq 83.wav : the coat is above the money and the lizard Speech onset: 1385 A onset: 2280 B onset: 3900 C onset: 5460
file complete OA23_Freq 85.wav : the radio is above the nurse and the remote Speech onset: 743 A onset: 1470 B onset: 3480 C onset: None
file complete OA23_Freq 86.wav : the house is below the watch and the nail Speech onset: 975 A onset: 1800 B onset: 3660 C onset: 4500
file complete OA23_Freq 89.wav : the gun is below the desk and Speech onset: 0 A onset: 1620 B onset: 3150 C onset: None
file complete OA23_Freq 90.wav : the fire is below the tape and the necklace Speech onset: 895 A onset: 1500 B onset: 3480 C onset: 5280
file complete OA23_Freq 93.wav : the shirt and the dog arab Speech onset: 0 A onset: 1230 B onset: 2340 C onset: None
file complete OA23_Freq 94.wav : the book and the hair are above Speech onset: 0 A onset: 1230 B onset: 2100 C onset: None
file complete OA23_Freq 97.wav : what is that the tickets and the fish are above or below th

file complete OA23_Rel 87.wav : the tea is below the flower and the cat Speech onset: 876 A onset: None B onset: None C onset: None
file complete OA23_Rel 88.wav : the table is below the doctor and the camel Speech onset: 896 A onset: 1290 B onset: 3240 C onset: 4080
file complete OA23_Rel 92.wav : the leg and the ring are above the broom Speech onset: 1937 A onset: 2700 B onset: 3630 C onset: 5190
file complete OA23_Rel 96.wav : the suit and the clock are below the zebra Speech onset: 0 A onset: 2070 B onset: 3600 C onset: 4740
file complete OA23_Rel 105.wav : the church is above the dart and the headphones Speech onset: 1353 A onset: 1920 B onset: 3360 C onset: 4260
file complete OA23_Rel 106.wav : the bed is below the frog and the rake Speech onset: 1428 A onset: 1980 B onset: 3270 C onset: 4080
file complete OA23_Rel 112.wav : the sun and the broom are above the house Speech onset: 1185 A onset: 1650 B onset: 2400 C onset: 3930
file complete OA23_Rel 115.wav : the hospital and the 

file complete OA24_Rel 8.wav : the girl is above the boy and the money Speech onset: 1497 A onset: 1590 B onset: 2640 C onset: 3300
file complete OA24_Rel 10.wav : the doctor is above the hospital and the newspaper Speech onset: 1019 A onset: 1110 B onset: 2160 C onset: 2970
file complete OA24_Rel 13.wav : the door and the window are below the keyboard Speech onset: 1289 A onset: 1380 B onset: 2039 C onset: 3000
file complete OA24_Rel 16.wav : the shirt and pants are about the key Speech onset: 1366 A onset: 1410 B onset: 1920 C onset: 2790
file complete OA24_Rel 21.wav : the bed is below the pillow and the truck Speech onset: 1323 A onset: 1380 B onset: 2520 C onset: 3120
file complete OA24_Rel 30.wav : the baby is above the crib and the same castle Speech onset: 1027 A onset: 1080 B onset: 2460 C onset: None
file complete OA24_Rel 33.wav : the gun and the canon or below the tie Speech onset: 1457 A onset: 1530 B onset: None C onset: 1950
file complete OA24_Rel 34.wav : the honey and 

file complete OA29_Freq 121.wav : the vest is below the cat and the strawberry Speech onset: 1874 A onset: 1980 B onset: 3270 C onset: 4230
file complete OA29_Freq 122.wav : the scissors are below the tree and the baby Speech onset: 1955 A onset: 2040 B onset: 3180 C onset: 3750
file complete OA29_Freq 126.wav : the shovel is above the card and the whale Speech onset: 1736 A onset: 1830 B onset: 3150 C onset: 4320
file complete OA29_Freq 127.wav : the fork is above the hat and the pills Speech onset: 1772 A onset: 1650 B onset: 2730 C onset: 3510
file complete OA29_Freq 130.wav : the map is above the heart and acorn Speech onset: 1389 A onset: 1530 B onset: 2490 C onset: 3065
file complete OA29_Freq 132.wav : the skirt and the plane or below the cave Speech onset: 2745 A onset: 2580 B onset: 3390 C onset: 4590
file complete OA29_Freq 133.wav : the era when the train are below the desk Speech onset: 1840 A onset: None B onset: 1980 C onset: 2670
file complete OA29_Freq 134.wav : the lam

file complete OA29_Rel 160.wav : the whale and the acorn are above the clock Speech onset: 2416 A onset: 2490 B onset: 3180 C onset: 4500
file complete YA102_Freq 83.wav : the coat and the money above the lizard Speech onset: 1195 A onset: 1320 B onset: 1770 C onset: 2400
file complete YA102_Freq 85.wav : the radio and the nurse are about the dominoes Speech onset: 554 A onset: 2490 B onset: 3420 C onset: 4140
file complete YA102_Freq 86.wav : the house and the wallet are hosting the wachovia now Speech onset: 1146 A onset: 1440 B onset: None C onset: None
file complete YA102_Freq 89.wav : the gun and the dusk or beneath the honey Speech onset: 658 A onset: 1170 B onset: 1770 C onset: 2700
file complete YA102_Freq 90.wav : the fire and the tape or beneath the necklace Speech onset: 1135 A onset: 1230 B onset: 1740 C onset: 2459
file complete YA102_Freq 93.wav : the shirt is above the dog and the dust Speech onset: 1365 A onset: 2130 B onset: 3510 C onset: None
file complete YA102_Freq 

file complete YA102_Rel 80.wav : the scarf has beneath the mittens and the horse Speech onset: 1034 A onset: 2040 B onset: 3480 C onset: 4080
file complete YA102_Rel 81.wav : the beach and the tire above Speech onset: 2102 A onset: 3120 B onset: None C onset: 3960
file complete YA102_Rel 87.wav : the key and the rows are beneath the cat Speech onset: 1155 A onset: 1260 B onset: None C onset: 1920
file complete YA102_Rel 88.wav : the table and the dude are being the camel Speech onset: 1321 A onset: 1440 B onset: None C onset: None
file complete YA102_Rel 92.wav : the leg is above the ring and the broom Speech onset: 1048 A onset: 1230 B onset: 2820 C onset: 3451
file complete YA102_Rel 96.wav : the coat is beneath the clock and the zebra Speech onset: 1337 A onset: None B onset: 1470 C onset: 2460
file complete YA102_Rel 105.wav : the church and the dark or above the headphones Speech onset: 836 A onset: 1170 B onset: 1860 C onset: 2820
file complete YA102_Rel 106.wav : the bed and the

file complete YA86_Freq 158.wav : the k tunnel and the necklace are below the b Speech onset: 1591 A onset: 3480 B onset: 4890 C onset: None
file complete YA86_Rel 2.wav : the dog is above the cat and the tie Speech onset: 1672 A onset: 1620 B onset: 2610 C onset: 3840
file complete YA86_Rel 8.wav : the girl is below the kid and the money Speech onset: 2042 A onset: 2220 B onset: None C onset: None
file complete YA86_Rel 10.wav : the doctor is below the hospital and the newspaper Speech onset: 1200 A onset: 1320 B onset: 2580 C onset: 3630
file complete YA86_Rel 13.wav : the door and the window are above the keyboard Speech onset: 1826 A onset: 1950 B onset: 2820 C onset: 6090
file complete YA86_Rel 16.wav : the shirt and the pants are below the key Speech onset: 2386 A onset: 2490 B onset: 3360 C onset: 4740
file complete YA86_Rel 21.wav : the bed is above the pillow and the truck Speech onset: 1470 A onset: 1560 B onset: 2760 C onset: 3570
file complete YA86_Rel 30.wav : the baby is 

file complete YA87_Freq 121.wav : the best is below the cat and the strawberry Speech onset: 1636 A onset: 2009 B onset: 3060 C onset: 3840
file complete YA87_Freq 122.wav : the scissors are below the tree and the baby Speech onset: 1150 A onset: 1230 B onset: 2160 C onset: 2760
file complete YA87_Freq 126.wav : shovel is above the card and the wheel Speech onset: 875 A onset: 1120 B onset: 2310 C onset: None
file complete YA87_Freq 127.wav : the fork is about the hat and the pills Speech onset: 684 A onset: 990 B onset: 1920 C onset: 2370
file complete YA87_Freq 130.wav : the mop is above the heart and the equipment Speech onset: 1794 A onset: 2009 B onset: 3060 C onset: None
file complete YA87_Freq 132.wav : the skirt and the plane are below the cave Speech onset: 1104 A onset: 1200 B onset: 1830 C onset: 2730
file complete YA87_Freq 133.wav : the arrow and the trainer beloved the desk Speech onset: 1125 A onset: 1920 B onset: None C onset: 2430
file complete YA87_Freq 134.wav : the 

file complete YA98_Rel 2.wav : the dog is above the cat and a tie Speech onset: 1437 A onset: 1560 B onset: 2820 C onset: 3510
file complete YA98_Rel 8.wav : the girl is below the boy and the money Speech onset: 1037 A onset: 1110 B onset: 2160 C onset: 2760
file complete YA98_Rel 10.wav : the doctor is below the hospital and the newspaper Speech onset: 1397 A onset: 1500 B onset: 2640 C onset: 3450
file complete YA98_Rel 13.wav : the door and the window are about the keyboard Speech onset: 837 A onset: 960 B onset: 1530 C onset: 2580
file complete YA98_Rel 16.wav : the shirt and the pants are below the key Speech onset: 1907 A onset: 1980 B onset: 2730 C onset: 3780
file complete YA98_Rel 21.wav : the bed is above the pillow and the truck Speech onset: 991 A onset: 1050 B onset: 2040 C onset: 2640
file complete YA98_Rel 30.wav : the baby is below the crib and the castle Speech onset: 992 A onset: 1110 B onset: 2460 C onset: None
file complete YA98_Rel 33.wav : the gun and the cannon a

In [None]:
import os
# Example usage
def delete_files_with_suffix(folder_path, suffix):
    for filename in os.listdir(folder_path):
        if filename.endswith(suffix):
            file_path = os.path.join(folder_path, filename)
            os.remove(file_path)
            print(f"Deleted file: {file_path}")
folder_path = r"C:\Users\James Belanger\Downloads\drive-download-20230702T014632Z-001"
suffix = "(1).wav"
delete_files_with_suffix(folder_path, suffix)

directories = [name for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]
for directory in directories:
    delete_files_with_suffix(os.path.join(folder_path, directory),suffix)
    #print(os.path.join(folder_path, directory))
   


In [None]:
import os
# Example usage
def delete_files_with_suffix(folder_path, suffix):
    for filename in os.listdir(folder_path):
        if filename.endswith(suffix):
            file_path = os.path.join(folder_path, filename)
            os.remove(file_path)
            print(f"Deleted file: {file_path}")
folder_path = r"C:\Users\James Belanger\Documents\Relatedness_and_Frequency\Cache"
suffix = "_Results.xlsx"
delete_files_with_suffix(folder_path, suffix)

directories = [name for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]
for directory in directories:
    delete_files_with_suffix(os.path.join(folder_path, directory),suffix)
    #print(os.path.join(folder_path, directory))
