In [1]:
import json

In [2]:
path = '../Data/DoodleSample'

### Extract input/output sequences 

In [34]:
def all_user_sessions(path, num_users):
    """
    Extracts and processes pitch value sequences from user sessions for the input and output generated by Coconet.

    This function reads data from a JSON file, extracts pitch value sequences from user sessions,
    removes duplicates, and returns a list containing pairs of unique pitch value tuples for each user.

    Args:
        path (str): The path to the JSON file containing the data.
        num_users (int): The number of users' data to process.

    Returns:
        list: A list containing pairs of unique pitch value tuples (input, output) for each user's sessions.
    """
    
    with open(path, 'r') as file:
        all_sessions = []

        for i in range(num_users):
            line = file.readline().strip()

            # Parse the user's line as JSON data
            parsed_data = json.loads(line)

            # Number of sessions per user
            user_sessions = len(parsed_data['input_sequence'])

            # Initialise an empty list for a user
            user_list = [] 

            # Extract each session for a user
            for session in range(user_sessions):
                input_notes = parsed_data['input_sequence'][session]['notes']
                output_notes = parsed_data['output_sequence'][session]['notes']

                # Sort the input and output notes by start time (0.0 if not present)
                sorted_input = sorted(input_notes, key=lambda x: x.get('startTime', 0.0))
                sorted_output = sorted(output_notes, key=lambda x: x.get('startTime', 0.0))
                
                # Input and output pitch lists
                input_pitches = [note['pitch'] for note in sorted_input]
                output_pitches = [note['pitch'] for note in sorted_output]

                input_pitch_tuple = tuple(input_pitches)
                output_pitch_tuple = tuple(output_pitches)

                # Append to list if input is not a duplicate
                all_inputs = [tuple[0] for tuple in user_list]
                if input_pitch_tuple not in all_inputs:
                    user_list.append((input_pitch_tuple, output_pitch_tuple))

            all_sessions.append(user_list)
                
    return all_sessions

In [41]:
data = all_user_sessions(path, 3)

In [42]:
def print_input_output(data):
    for user in data:
        print("New User")
        for session in user:
            input_seq = session[0]
            output_seq = session[1]
            print("Input: ", input_seq)
            print("Output: ", output_seq)
            print()

In [43]:
print_input_output(data)

New User
Input:  (71, 69, 74, 74, 72, 71, 67, 71, 69)
Output:  (71, 67, 62, 43, 69, 74, 69, 55, 54, 72, 67, 64, 52, 71, 66, 62, 50, 67, 67, 67, 52, 71, 55, 50, 64, 67, 69, 64, 57, 48, 66, 71, 67, 62, 47, 69, 64, 49, 45, 57, 50, 62)

Input:  (71, 69, 74, 74, 72, 71, 67, 71, 69, 72, 76, 69, 74, 67, 72)
Output:  (71, 67, 62, 55, 69, 74, 69, 53, 72, 67, 64, 52, 71, 65, 62, 67, 67, 60, 50, 71, 64, 48, 69, 69, 60, 53, 72, 65, 45, 76, 64, 59, 47, 69, 65, 60, 45, 74, 62, 47, 57, 67, 64, 55, 48, 72, 65, 50, 57)

New User
Input:  (72, 67, 71, 69)
Output:  (72, 64, 60, 57, 59, 67, 60, 66, 57, 71, 67, 62, 55, 69, 69, 54, 71, 67, 64, 52, 60, 69, 66, 62, 50, 71, 67, 43, 69, 66, 49, 50)

Input:  (72, 72, 67, 71, 69, 71, 74, 77, 76)
Output:  (72, 64, 55, 52, 57, 67, 59, 55, 62, 60, 54, 71, 67, 62, 55, 69, 66, 50, 71, 67, 62, 55, 57, 74, 59, 57, 77, 59, 76, 60, 60)

New User
Input:  (67, 71, 71, 71, 65, 65, 67, 71, 71, 65, 71, 67)
Output:  (67, 63, 58, 51, 71, 65, 53, 67, 55, 63, 56, 53, 65, 58, 46, 48

## JSON file sample

In [33]:
with open(path, 'r') as file:
    line = file.readline().strip()

    # Parse the user's line as JSON data
    parsed_data = json.loads(line)

    print(parsed_data['output_sequence'][0]['notes'])

[{'endTime': 0.5, 'pitch': 71, 'quantizedEndStep': '2', 'velocity': 100}, {'endTime': 1.0, 'pitch': 69, 'quantizedEndStep': '4', 'quantizedStartStep': '2', 'startTime': 0.5, 'velocity': 100}, {'endTime': 2.0, 'pitch': 74, 'quantizedEndStep': '8', 'quantizedStartStep': '4', 'startTime': 1.0, 'velocity': 100}, {'endTime': 2.5, 'pitch': 72, 'quantizedEndStep': '10', 'quantizedStartStep': '8', 'startTime': 2.0, 'velocity': 100}, {'endTime': 3.0, 'pitch': 71, 'quantizedEndStep': '12', 'quantizedStartStep': '10', 'startTime': 2.5, 'velocity': 100}, {'endTime': 3.5, 'pitch': 67, 'quantizedEndStep': '14', 'quantizedStartStep': '12', 'startTime': 3.0, 'velocity': 100}, {'endTime': 4.0, 'pitch': 71, 'quantizedEndStep': '16', 'quantizedStartStep': '14', 'startTime': 3.5, 'velocity': 100}, {'endTime': 5.0, 'pitch': 69, 'quantizedEndStep': '20', 'quantizedStartStep': '16', 'startTime': 4.0, 'velocity': 100}, {'endTime': 6.0, 'pitch': 71, 'quantizedEndStep': '24', 'quantizedStartStep': '20', 'startT