In [None]:
import pandas as pd, os, requests, sys, numpy as np, datetime
import matplotlib.pyplot as plt
import seaborn as sns

RECORDING_PATH = os.path.expanduser("~/.talon/recordings")
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
DATA_DIR = os.path.join(SCRIPT_PATH, "../data")
HOSTNAME = os.uname()[1]
OUTPUT_PATH = os.path.join(SCRIPT_PATH, DATA_DIR, "talon-conversion")
USER_ID = hash(HOSTNAME)  % 10000
MIN_NUM_SAMPLES = 4
REMOTE_SERVER = "http://localhost:5000"

In [None]:

def plot_data_collection(data):
 
    x_values = list(data.keys())
    y_values = list(data.values())
    #  plot all the data and do a line of best fit
    plt.plot(x_values, y_values, 'o', color='black')
    z = np.polyfit(x_values, y_values, 1)
    p = np.poly1d(z)
    plt.plot(x_values,p(x_values),"r--")

    plt.savefig(os.path.join(SCRIPT_PATH, "../../doc/assets/talon_collection.png"))


parseCmd = lambda filename: filename.split("-")[0]

def validForTraining(filename, cmd) -> bool:

    # Send a web request to the server to get the list of commands

    result= requests.get(REMOTE_SERVER + "/commands")
    try:
        training_commands = result.json()['detail']
    except Exception as e:
        print(" Error getting commands from the server. Is the server running?")
        sys.exit(1)
        
    words_in_command = len(cmd.split(" "))
    return filename.endswith(".flac") and \
            words_in_command == 1 and \
            cmd in training_commands and \
            len([f for f in os.listdir(RECORDING_PATH) if parseCmd(f) == cmd]) > MIN_NUM_SAMPLES

In [None]:
def parse():
    # create training directory if it doesn't exist 
    if not os.path.exists(OUTPUT_PATH):
        os.makedirs(OUTPUT_PATH)

    timesSaid = {}

    for filename in os.listdir(RECORDING_PATH):

        cmd = parseCmd(filename)

        if validForTraining(filename, cmd):

            timesSaid[cmd] = timesSaid.get(cmd, -1) + 1
        
            file_creation_date = datetime.datetime.fromtimestamp(os.path.getctime(os.path.join(RECORDING_PATH, filename)))

            output_name = f'{USER_ID}_nohash_{timesSaid[cmd]}.wav'
            