In [1]:
import numpy as np

In [2]:
class SentenceGenerator(object):
    """Markov chain creator for simulating bad English.

    Attributes:
        words(set): unique words in training set
        line(list): each striped/split sentance of training set
        count(int): number of unique words
        transition(np.array): transition matrix

    Example:
        >>> yoda = SentenceGenerator("Yoda.txt")
        >>> print(yoda.babble())
        The dark side of loss is a path as one with you.
    """
    def __init__(self, filename):
        """Read the specified file and build a transition matrix from its
        contents. You may assume that the file has one complete sentence
        written on each line.
        """
        with open(filename) as f:
            lines = f.readlines()
        list_of_sentances = [sentance.split() for sentance in lines]
        unique = set()

        for k in range(len(list_of_sentances)):
            set_of_words = set(list_of_sentances[k])
            for word in set_of_words:
                unique.add(word) #use set to find unique words

        bounds = len(unique) + 2 #plus two for start and stop
        transition_matrix = np.zeros((bounds,bounds))
        states = ['$tart'] + list(unique) + ['$top']

        for sentance in lines:
            words = sentance.split()
            transition_matrix[states.index(words[0]),0] += 1 #add starting words

            for j, value in enumerate(words):
                if j == len(words)-1:
                    transition_matrix[states.index('$top'),states.index(words[j])] +=1 #add ending words
                else:
                    transition_matrix[states.index(words[j+1]),states.index(value)] +=1 #associate other words with their next value
        transition_matrix[-1,-1] = 1 #final entry of matrix need to be mapped to itself
        transition_matrix /= transition_matrix.sum(axis = 0) #normalize the matrix along the columns

        self.transition = transition_matrix
        self.states = states

    def babble(self):
        """Begin at the start sate and use the strategy from
        four_state_forecast() to transition through the Markov chain.
        Keep track of the path through the chain and the corresponding words.
        When the stop state is reached, stop transitioning and terminate the
        sentence. Return the resulting sentence as a single string.
        """
        i = 0
        indecies = []
        while self.states[i] != '$top':
            predictions = np.random.multinomial(1,self.transition[:,i])
            indecies.append(np.argmax(predictions))
            i = indecies[-1]
        indecies.pop() #get rid of '$top'
        output = []
        for j in indecies:
            output.append(self.states[j])
        return (' '.join(output)) #print it out as a string



In [15]:
obj = SentenceGenerator("yoda.txt")
for i in range(10):
    print(i,obj.babble())

0 Who he begins?
1 Careful you the horizon.
2 I watched.
3 How can I have.
4 Nothing more knowledge and that is.
5 If a special session of the dark side of the Jedi must be when sensing the star and his father.
6 To see the sway of greed, that misread could have turned to see.
7 Yes, too old you the force.
8 How can we should be.
9 See through the future is.
