In [3]:
from typing import List
import itertools

def chain_dialog_history(sys_utterances: List[str], usr_utterances: List[str], sys_token: str = "", usr_token: str = "", sep_token: str = ""):
    """
    Interleave system and user utterances to a combined history.

    Args:
        sys_utterances: List of all system utterances (one turn per list entry)
        usr_utterances: List of all user utterances (one turn per list entry)
        sys_token: Token appended to each system utterance, e.g. "[SYS]" -> [SYS] sys turn 1 [USR] usr turn 1 [SYS] sys turn 2 ...
        usr_token: Token appended to each user utterance, e.g. "[SYS]" -> [SYS] sys turn 1 [USR] usr turn 1 [SYS] sys turn 2 ...
        sep_token Seperator token added between each system and user utterance, e.g. "[SEP]" -> sys 1 [SEP] usr turn 1 [SEP] sys turn 2 ...

    Returns: 
        List[Tuple(sys: str, usr: str)]
    """
    turns = len(sys_utterances)
    assert len(usr_utterances) == turns
    return list(itertools.chain(zip([sys_token] * turns, [utterance for utterance in sys_utterances], [sep_token] * turns, [usr_token] * turns, [utterance for utterance in usr_utterances])))
    


In [4]:
sys_hist = ["system 1", "system 2"]
usr_hist = ["usr 1", "usr 2"]

print(chain_dialog_history(sys_hist, usr_hist, sys_token="[SYS]"))
print(chain_dialog_history(sys_hist, usr_hist, usr_token="[USR]"))
print(chain_dialog_history(sys_hist, usr_hist, sep_token="[SEP]"))
print(chain_dialog_history(sys_hist, usr_hist, sys_token="[SYS]", usr_token="[USR]"))
print(chain_dialog_history(sys_hist, usr_hist, sys_token="[SYS]", sep_token="[SEP]"))
print(chain_dialog_history(sys_hist, usr_hist, usr_token="[USR]", sep_token="[SEP]"))
print(chain_dialog_history(sys_hist, usr_hist, sys_token="[SYS]", usr_token="[USR]", sep_token="[SEP]"))

[('[SYS]', 'system 1', '', '', 'usr 1'), ('[SYS]', 'system 2', '', '', 'usr 2')]
[('', 'system 1', '', '[USR]', 'usr 1'), ('', 'system 2', '', '[USR]', 'usr 2')]
[('', 'system 1', '[SEP]', '', 'usr 1'), ('', 'system 2', '[SEP]', '', 'usr 2')]
[('[SYS]', 'system 1', '', '[USR]', 'usr 1'), ('[SYS]', 'system 2', '', '[USR]', 'usr 2')]
[('[SYS]', 'system 1', '[SEP]', '', 'usr 1'), ('[SYS]', 'system 2', '[SEP]', '', 'usr 2')]
[('', 'system 1', '[SEP]', '[USR]', 'usr 1'), ('', 'system 2', '[SEP]', '[USR]', 'usr 2')]
[('[SYS]', 'system 1', '[SEP]', '[USR]', 'usr 1'), ('[SYS]', 'system 2', '[SEP]', '[USR]', 'usr 2')]
