In [1]:
import json

In [2]:
path = '../Data/DoodleSample'

### Extract input/output sequences 

In [3]:
def sort_key(note):
    return int(note.get('quantizedStartStep', 0))

In [13]:
def all_user_sessions(path, num_users):
    """
    Extracts and processes pitch value sequences from user sessions for the input and output generated by Coconet.

    This function reads data from a JSON file, extracts pitch value sequences from user sessions,
    removes duplicates, and returns a list containing pairs of unique pitch value tuples for each user.

    Args:
        path (str): The path to the JSON file containing the data.
        num_users (int): The number of users' data to process.

    Returns:
        list: A list containing pairs of unique pitch value tuples (input, output) for each user's sessions.
    """
    
    with open(path, 'r') as file:
        all_sessions = []

        for i in range(num_users):
            line = file.readline().strip()

            # Parse the user's line as JSON data
            parsed_data = json.loads(line)
            #notes = parsed_data['input_sequence'][0]['notes']
            #sorted_data = sorted(notes, key=lambda x: x.get('startTime', 0.0))
            
            #print(parsed_data)

            # Number of sessions per user
            num_user_sessions = len(parsed_data['input_sequence'])

            # Initialise an empty list for a user
            user_list = [] 

            # Extract each session for a user
            for session in range(num_user_sessions):
                input_notes = parsed_data['input_sequence'][session]['notes']
                output_notes = parsed_data['output_sequence'][session]['notes']
                
                #print(input_notes)

                # Sort the input and output notes by start time (0 if not present)
                sorted_input = sorted(input_notes, key=sort_key)
                sorted_output = sorted(output_notes, key=sort_key)
                
                start_steps = [int(note['quantizedStartStep']) if 'quantizedStartStep' in note else 0 for note in sorted_input]
                #print(start_steps)
                end_steps = [int(note['quantizedEndStep']) if 'quantizedEndStep' in note else 0 for note in sorted_input]
                #print(end_steps)
                
                # Input and output pitch lists
                input_pitches = [note['pitch'] for note in sorted_input]
                output_pitches = [note['pitch'] for note in sorted_output]

                input_pitch_tuple = tuple(input_pitches)
                output_pitch_tuple = tuple(output_pitches)
                
                #print(input_pitch_tuple)
                
                all_inputs = [tuple[0] for tuple in user_list]
                
                if end_steps[-1] == 32:
                    input_length = len(start_steps)
                    input_pitch_with_beat = []
                    for i in range(input_length):
                        for j in range(end_steps[i] - start_steps[i]):
                            input_pitch_with_beat.append(input_pitch_tuple[i])
                            
                    if input_pitch_tuple not in all_inputs:
                        user_list.append((input_pitch_with_beat, output_pitch_tuple))
                        
                #print(input_pitch_with_beat)
                #break

                # Append to list if input is not a duplicate
                #all_inputs = [tuple[0] for tuple in user_list]
                #if input_pitch_tuple not in all_inputs:
                    #user_list.append((input_pitch_tuple, output_pitch_tuple))

            all_sessions.append(user_list)
                
    return all_sessions

In [14]:
data = all_user_sessions(path, 6)

In [15]:
def print_input_output(data):
    for user in data:
        print("New User")
        for session in user:
            input_seq = session[0]
            output_seq = session[1]
            print("Input: ", input_seq)
            print("Output: ", output_seq)
            print()

In [16]:
print_input_output(data)

New User
Input:  [71, 71, 69, 69, 74, 74, 74, 74, 72, 72, 71, 71, 67, 67, 71, 71, 69, 69, 69, 69, 72, 72, 76, 76, 69, 69, 74, 74, 67, 67, 72, 72]
Output:  (71, 67, 62, 55, 69, 74, 69, 53, 72, 67, 64, 52, 71, 65, 62, 67, 67, 60, 50, 71, 64, 48, 69, 69, 60, 53, 72, 65, 45, 76, 64, 59, 47, 69, 65, 60, 45, 74, 62, 47, 57, 67, 64, 55, 48, 72, 65, 50, 57)

New User
Input:  [72, 72, 72, 72, 67, 67, 67, 67, 71, 71, 71, 71, 69, 69, 69, 69, 71, 71, 71, 71, 74, 74, 74, 74, 77, 77, 77, 77, 76, 76, 76, 76]
Output:  (72, 64, 55, 52, 57, 67, 59, 55, 62, 60, 54, 71, 67, 62, 55, 69, 66, 50, 71, 67, 62, 55, 57, 74, 59, 57, 77, 59, 76, 60, 60)

New User
Input:  [67, 67, 71, 71, 71, 71, 71, 71, 65, 65, 65, 65, 65, 65, 65, 65, 67, 67, 67, 67, 71, 71, 71, 71, 71, 71, 65, 65, 71, 71, 67, 67]
Output:  (67, 63, 58, 51, 71, 65, 53, 67, 55, 63, 56, 53, 65, 58, 46, 48, 65, 50, 67, 63, 58, 51, 71, 65, 60, 56, 58, 58, 65, 62, 71, 64, 60, 60, 67, 63)

Input:  [62, 62, 71, 71, 71, 71, 71, 71, 65, 65, 71, 71, 65, 65, 

## JSON file sample

In [None]:
with open(path, 'r') as file:
    for _ in range(5):
        line = file.readline().strip()

    # Parse the user's line as JSON data
    parsed_data = json.loads(line)
    notes = parsed_data['input_sequence'][0]['notes']

    #print(notes)
    
    sorted_data = sorted(notes, key=lambda x: x.get('startTime', 0.0))
    print(sorted_data)
    
    # Extract 'quantizedStartStep' values into a list
    quantized_start_steps = [item['quantizedStartStep'] if 'quantizedStartStep' in item else 0 for item in sorted_data]

    # Convert 'quantizedStartStep' values to integers
    quantized_start_steps = [int(step) for step in quantized_start_steps]

    # Sort the list in ascending order
    quantized_start_steps.sort()

    print(quantized_start_steps)
    
    # Extract and print the note and quantized start time
    for note in notes:
        note_pitch = note['pitch']
        quantized_start_step = note['quantizedEndStep']
        
        print(f'Note: {note_pitch}, Quantized Start Step: {quantized_start_step}')