In [1]:
import json

In [2]:
path = '../Data/DoodleSample'

### Extract input/output sequences 

In [21]:
def all_user_sessions(path, key, num_users):
    
    """
    Extracts and processes pitch value sequences from user sessionsfor the input and the output generated by Coconet.

    This function reads data from a JSON file, extracts pitch value sequences from user sessions,
    removes duplicates, and returns a list containing the unique pitch value tuples for each user.

    Args:
        path (str): The path to the JSON file containing the data.
        key (str): The key for accessing either the input or output sequences.
        num_users (int): The number of users' data to process.

    Returns:
        list: A list containing lists of unique pitch value tuples for each user's sessions.
    """
    
    with open(path, 'r') as file:
        
        all_sessions = []
        
        for i in range(num_users):
            line = file.readline().strip()

            # Parse the user's line as JSON data
            parsed_data = json.loads(line)

            # Number of sessions per user
            user_sessions = len(parsed_data[key + '_sequence'])

            user_list = []  # Initialize an empty list for the user's set of pitch value tuples
            user_set = set()  # Initialize an empty set to track unique tuples

            # Extract each session for a user
            for session in range(user_sessions):
                notes = parsed_data[key + '_sequence'][session]['notes']
                sorted_data = sorted(notes, key=lambda x: x.get('startTime', 0.0))

                pitch_values = [entry['pitch'] for entry in sorted_data]

                pitch_tuple = tuple(pitch_values)
                if pitch_tuple not in user_set:
                    user_list.append(pitch_tuple)  # Add tuple to the list
                    user_set.add(pitch_tuple)  # Add tuple to the set

            all_sessions.append(user_list)

    # Print all the pitch tuples horizontally
    # for user_sessions in all_sessions:
    #     for session_tuple in user_sessions:
    #         session_str = ", ".join(str(note) for note in session_tuple)
    #         print("({})".format(session_str))
                
    return all_sessions


In [22]:
s = all_user_sessions(path, 'input', 3)
s

[[(71, 69, 74, 74, 72, 71, 67, 71, 69),
  (71, 69, 74, 74, 72, 71, 67, 71, 69, 72, 76, 69, 74, 67, 72)],
 [(72, 67, 71, 69), (72, 72, 67, 71, 69, 71, 74, 77, 76)],
 [(67, 71, 71, 71, 65, 65, 67, 71, 71, 65, 71, 67),
  (62, 71, 71, 71, 65, 71, 65, 67, 62, 71, 71, 62, 69, 67)]]

In [15]:
with open(path, 'r') as file:
    line = file.readline().strip()

    # Parse the user's line as JSON data
    parsed_data = json.loads(line)

    print(parsed_data)

{'backend': ['l', 'l', 'l'], 'composition_time': [81541, 128110, 13443], 'country': ['es', 'es', 'es'], 'feedback': ['2', '0', '2'], 'input_sequence': [{'notes': [{'endTime': 0.5, 'pitch': 71, 'pitchName': 'B', 'quantizedEndStep': '2', 'velocity': 100}, {'endTime': 1.0, 'pitch': 69, 'pitchName': 'A', 'quantizedEndStep': '4', 'quantizedStartStep': '2', 'startTime': 0.5, 'velocity': 100}, {'endTime': 1.5, 'pitch': 74, 'pitchName': 'D', 'quantizedEndStep': '6', 'quantizedStartStep': '4', 'startTime': 1.0, 'velocity': 100}, {'endTime': 2.0, 'pitch': 74, 'pitchName': 'D', 'quantizedEndStep': '8', 'quantizedStartStep': '6', 'startTime': 1.5, 'velocity': 100}, {'endTime': 2.5, 'pitch': 72, 'pitchName': 'C', 'quantizedEndStep': '10', 'quantizedStartStep': '8', 'startTime': 2.0, 'velocity': 100}, {'endTime': 3.0, 'pitch': 71, 'pitchName': 'B', 'quantizedEndStep': '12', 'quantizedStartStep': '10', 'startTime': 2.5, 'velocity': 100}, {'endTime': 3.5, 'pitch': 67, 'pitchName': 'G', 'quantizedEndSt