    Ben Christensen
    Math 321
    11/2/17

Use markov chains to predict weather and create a sentence generator. We prime the sentence generator with text from Taylor Swift song lyrics for one and quotes from Yoda in the star wars movies for the other.

In [1]:
import numpy as np
from scipy import linalg as la
import pdb

In [2]:
def random_chain(n):
    """Create and return a transition matrix for a random Markov chain with
    'n' states. This should be stored as an nxn NumPy array.
    """
    Markov = np.random.rand(n,n)

    return Markov / np.sum(Markov, axis=0)


# Problem 2
def forecast(days):
    """Forecast tomorrow's weather given that today is hot."""
    transition = np.array([[0.7, 0.6], [0.3, 0.4]])

    i = 0
    predictions = []
    today = 0
    while i < days:
        if today == 0:
            today = np.random.binomial(1, transition[1, 0])
        elif today == 1:
            today = np.random.binomial(1, transition[1, 1])
        predictions.append(today)
        i += 1
    return predictions



# Problem 3
def four_state_forecast(days):
    """Run a simulation for the weather over the specified number of days,
    with mild as the starting state, using the four-state Markov chain.
    Return a list containing the day-by-day results, not including the
    starting day.

    Examples:
        >>> four_state_forecast(3)
        [0, 1, 3]
        >>> four_state_forecast(5)
        [2, 1, 2, 1, 1]
    """
    i = 0
    predictions = []
    today = 1
    transitionMatrix = np.array([ [.5, .3, .1, 0],
                                    [.3, .3, .3, .3],
                                    [.2, .3, .4, .5],
                                    [0, .1, .2, .2]])
    while i < days:
        today = np.random.multinomial(1, transitionMatrix[:, today]).nonzero()[0][0]
        predictions.append(today)
        i += 1
    return predictions

#Every Markov chain has at least one steady state distribution

# Problem 4
def steady_state(A, tol=1e-12, N=40):
    """Compute the steady state of the transition matrix A.

    Inputs:
        A ((n,n) ndarray): A column-stochastic transition matrix.
        tol (float): The convergence tolerance.
        N (int): The maximum number of iterations to compute.

    Raises:
        ValueError: if the iteration does not converge within N steps.

    Returns:
        x ((n,) ndarray): The steady state distribution vector of A.
    """
    n = np.shape(A)[0]
    x0 = np.random.rand(n)
    x0 = x0 / np.sum(x0)
    k = 0
    within_tol = False
    #pdb.set_trace()
    while k < N and within_tol != True:
        x1 = A @ x0
        if la.norm(x0 - x1) < tol:
            within_tol = True
        x0 = x1
        k += 1
    if within_tol != True:
        raise ValueError("A^k does not converge")
    return x0


# Problems 5 and 6
class SentenceGenerator(object):
    """Markov chain creator for simulating bad English.

    Attributes:
        (what attributes do you need to keep track of?)

    Example:
        >>> yoda = SentenceGenerator("Yoda.txt")
        >>> print(yoda.babble())
        The dark side of loss is a path as one with you.
    """
    def __init__(self, filename):
        """Read the specified file and build a transition matrix from its
        contents. You may assume that the file has one complete sentence
        written on each line.
        """
        with open(filename, 'r') as myFile:
            lines = myFile.readlines()
        uniques = set()
        for line in lines:
            line = line.split()
            for word in line:
                uniques.add(word)
        n = len(uniques) + 2
        Matrix = np.zeros((n, n))

        states = ["$tart"]
        myDict = {"$tart": 0}
        for line in lines:
            line = line.split()
            for i in range(len(line)):
                if line[i] not in states:
                    #Add new word to list of states if it isn't already there.
                    states.append(line[i])
                    #Create a dictionary entry from word to its index in the list of states
                    myDict[line[i]] = states.index(line[i])
                if i != 0:
                    #For all but the first word in the line, increment the
                    #transition matrix by 1 where the previous word in the line
                    #points to the current word in the line.
                    Matrix[myDict[line[i]], myDict[line[i-1]]] += 1
            Matrix[myDict[line[0]] , 0] += 1
            Matrix[-1, myDict[line[-1]]] += 1
        Matrix[-1, -1] = 1
        Matrix = Matrix / np.sum(Matrix, axis=0)

        states.append("$top")
        myDict["$top"] = states.index("$top")
        self.states = states
        self.dictionary = myDict
        self.transition = Matrix

    def babble(self):
        """Begin at the start sate and use the strategy from
        four_state_forecast() to transition through the Markov chain.
        Keep track of the path through the chain and the corresponding words.
        When the stop state is reached, stop transitioning and terminate the
        sentence. Return the resulting sentence as a single string.
        """
        i = 0
        sentence = ""
        state = "$tart"
        #self.transition
        while state != "$top":
            state = self.states[np.random.multinomial(1, self.transition[:, self.dictionary[state]]).nonzero()[0][0]]
            if state != "$top":
                sentence += " " + state
        return sentence.strip()



In [3]:
A = random_chain(5)
print(np.allclose(A @ steady_state(A), steady_state(A)))
print(np.linalg.matrix_power(A, 20), steady_state(A))

print(np.allclose(np.linalg.matrix_power(A, 20)[:, 0], steady_state(A)))

tM2 = np.array([ [.7, .6],
                [.3, .4]])
tM4 = np.array([ [.5, .3, .1, 0],
                [.3, .3, .3, .3],
                [.2, .3, .4, .5],
                [0, .1, .2, .2]])

ss1 = steady_state(tM2)
ss2 = steady_state(tM4)
print(ss1)
print(ss2)

for state in [0, 1]:
    print(len([i for i, j in enumerate(forecast(2000)) if j == state]) / 2000)
for state in [0, 1, 2, 3]:
    print(len([i for i, j in enumerate(four_state_forecast(2000)) if j == state]) / 2000)



True
[[ 0.21161069  0.21161069  0.21161069  0.21161069  0.21161069]
 [ 0.10680947  0.10680947  0.10680947  0.10680947  0.10680947]
 [ 0.21744493  0.21744493  0.21744493  0.21744493  0.21744493]
 [ 0.22626368  0.22626368  0.22626368  0.22626368  0.22626368]
 [ 0.23787123  0.23787123  0.23787123  0.23787123  0.23787123]] [ 0.21161069  0.10680947  0.21744493  0.22626368  0.23787123]
True
[ 0.66666667  0.33333333]
[ 0.24655172  0.3         0.33275862  0.12068966]
0.6705
0.3145
0.2395
0.2945
0.3305
0.107


In [9]:
filename1 = "/Users/benchristensen/Desktop/ACME Python Labs/Volume2-Student-Materials/MarkovChains/tswift1989.txt"
filename2 = "/Users/benchristensen/Desktop/ACME Python Labs/Volume2-Student-Materials/MarkovChains/yoda.txt"
swift_generator = SentenceGenerator(filename1)
yoda_generator = SentenceGenerator(filename2)
print("Create 5 sentences that sound like Taylor Swift")
for i in range(5):
    print(i+1, ". ", swift_generator.babble(), sep='')
print("\n")
print("Create 5 sentences that sound like Yoda.")
for i in range(5):
    print(i+1, ". ", yoda_generator.babble(), sep='')



Create 5 sentences that sound like Taylor Swift
1. love's a polaroid of thirst
2. just run and fake
3. this
4. you would
5. it's a thing that i'll write your heart i'll put it off i shake shake shake shake it all you can shake it off


Create 5 sentences that sound like Yoda.
1. Mourn them we must.
2. Trained as good you it dominate your opinion is.
3. More battalions to lose.
4. A Jedi craves not these things.
5. No interest in him not.
