In [1]:
# Title: Information Theory - Ex. 1.3
# Author: Vedat Sinan Ural
# Contact: vsural@gmail.com

# Code source(with lots of modifications): http://stackoverflow.com/q/11373192

import numpy as np
from numpy.random import random_sample


def weighted_values(values, probabilities, size):
    """
    Function that creates a numpy array of random variables
    given an alphabet and a probability distribution.

    It returns an array of values occured in the sequence
    (unique_elements), their number of occurence(counts), and
    a Boolean called typical.

    typical = 0 for a typical sequence and 1 for an atypical one.

    values: Values of your random variables
    probabilities: Probability distribution
    size: Size of your output(i.e. Length of the message)
    """

    # Create the sequence according to the values and probabilities
    bins = np.cumsum(probabilities)
    sequence = values[np.digitize(random_sample(size), bins)]

    # Finding the element counts
    unique_elements, counts = np.unique(sequence, return_counts=True)

    # Check if the sequence is typical or not
    if len(counts) == 1:
        typical = 0
    elif len(counts) == 2 and counts[1] == 1:
        typical = 0
    else:
        typical = 1  # Atypical case

    return unique_elements, counts, typical


In [3]:
# Determine the size of the output, values and probabilities:
N = 16
values = np.array(["A", "B", "C", "D"])
probabilities = np.array([0.985, 0.005, 0.005, 0.005])

unique_els, counts, typical = weighted_values(values, probabilities, N)
print unique_els, counts

if typical == 0:
    print "Typical"
else:
    print "Atypical"

['A'] [16]
Typical
