# **GENERATING TEXT WITH MARKOV** **CHAINS**

In [41]:
weather_chain = {
    'sun': ['sun', 'sun', 'sun', 'sun', 'sun', 'sun', 'sun', 'sun', 'sun', 'rain'],
    'rain': ['sun', 'rain']
}

In [42]:
import random

# the initial state is chosen randomly
weather = [random.choice(list(weather_chain.keys()))]
for i in range(10):
    weather.append(random.choice(weather_chain[weather[i]]))

In [44]:
['rain', 'rain', 'rain', 'sun', 'sun', 'sun', 'sun', 'sun', 'rain','rain','sun']

['rain',
 'rain',
 'rain',
 'sun',
 'sun',
 'sun',
 'sun',
 'sun',
 'rain',
 'rain',
 'sun']

In [45]:
def build_model(source, state_size):
    '''
    Given a corpus and a state size, build a Markov chain.
    '''
    source = source.split()
    model = {}
    for i in range(state_size, len(source)):
        current_word = source[i]
        previous_words = ' '.join(source[i-state_size:i])
        if previous_words in model:
            model[previous_words].append(current_word)
        else:
            model[previous_words] = [current_word]

    return model

In [46]:
{
  "An apple":[
    "is"
  ],
  "apple is":[
    "very"
  ],
  "is very":[
    "good.",
    "bad."
  ],
  "very good.":[
    "An"
  ],
  "good. An":[
    "orange"
  ],
  "An orange":[
    "is"
  ],
  "orange is":[
    "is"
  ]
}

{'An apple': ['is'],
 'apple is': ['very'],
 'is very': ['good.', 'bad.'],
 'very good.': ['An'],
 'good. An': ['orange'],
 'An orange': ['is'],
 'orange\xa0is': ['is']}

In [47]:
def generate_text(model, state_size, min_length):
    '''
    Consume a Markov chain model (make sure to specify the <state_size> used)
    to generate text that is at least <min_length> size long.
    '''
    def get_new_starter():
        return random.choice([s.split(' ') for s in model.keys() if s[0].isupper()])
    text = get_new_starter()

    i = state_size
    while True:
        key = ' '.join(text[i-state_size:i])
        if key not in model:
            text += get_new_starter()
            i += 1
            continue

        next_word = random.choice(model[key])
        text.append(next_word)
        i += 1
        if i > min_length and text[-1][-1] == '.':
           break
    return ' '.join(text)

In [48]:
'An apple is very bad.'
'An orange is very bad.'
'An orange is very good.'
'An apple is very good.'

'An apple\xa0is\xa0very\xa0good.'