In [None]:
def generate_multiple_sequences(num, seq_len, query_distribution):
    """
        Generates a list of query sequences with query IDs sampled from a given distribution 
        with replacement.

        Input:
            num: The number of sequences to generate.
            seq_len: length of the sequence to be generated.
            query_distribution: a list of tuples of the form:
                [(query_id, query_freq), ...]
        Output:
            a list of query sequences of the form 'sequence_number.query_number,query_id' e.g.,
            [['0.0,89123', '0.1,23837', '0.2,5438', '0.3,5438', '0.4,5438'],
                 ['1.0,5438', '1.1,23837', '1.2,5438', '1.3,5438', '1.4,5438']]
    """
    sequences = []
    for i in range(num):
        np.random.seed()

        #normailze the frequencies to form a distribution
        query_ids, distribution = zip(*query_distribution)
        distribution /= sum(np.array(distribution))
        sequence = np.random.choice(query_ids, size=seq_len, replace=True, p=distribution)
        sequence = [str(i)+"."+str(idx)+","+str(q) for idx, q in enumerate(sequence)]
        sequences.append(sequence)
    
    return sequences

In [None]:
'''
example usage: 5 sequences 
generate_multiple_sequences(5, 2000, query-distributions.tsv)
'''