In [1]:
%matplotlib inline
import nltk
import os
import re
import random

In [7]:
words = {"cat":0.5, "dog":0.2, "lemur":0.3}

In [27]:
def discrete_prob(d):
    r = random.random()
    sum = 0
    for k in d:
        sum += d[k]
        if r < sum:
            return k


In [26]:
for i in range(1):
    print(discrete_prob(words))

0.9341057473498428
cat
0.5
dog
0.7
lemur
1.0
lemur


In [35]:
mc = {"cat":{"cat":0.9, "dog":0.1}, "dog":{"cat":0.1, "dog":0.9}}

## Generating text from a Markov chain

In [97]:
def generate(mc, sep):
    current = list(random.choice(list(mc.keys())))
    seq = []
    for i in range(500):
        seq.append(discrete_prob(mc[tuple(current)]))
        current = current[1:] + [seq[-1]]
    return sep.join(seq)

In [103]:
seq = generate(mc2, " ")

In [104]:
seq

'cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog cat cat cat cat cat dog dog dog dog dog dog dog dog cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat

In [62]:
seq_list = seq.split()


## Learning the probabilities for a Markov Chain

In [88]:
def learn_markov(seq_list, order):
    mc2 = {}
    prev = ["_"] * order
    for i in range(0, len(seq_list)):
        prev_t = tuple(prev)
        if prev_t not in mc2:
            mc2[prev_t] = {}
        mc2[prev_t][seq_list[i]] = mc2[prev_t].get(seq_list[i], 0) + 1
        prev = prev[1:]
        prev.append(seq_list[i])

    for k in mc2:
        sum1 = 0
        for k2 in mc2[k]:
            sum1 += mc2[k][k2]
        for k2 in mc2[k]:
            mc2[k][k2] /= sum1
    return mc2

In [109]:
mc2 = learn_markov(seq_list, 4)

In [110]:
mc2

{('_', '_', '_', '_'): {'cat': 1.0},
 ('_', '_', '_', 'cat'): {'cat': 1.0},
 ('_', '_', 'cat', 'cat'): {'cat': 1.0},
 ('_', 'cat', 'cat', 'cat'): {'cat': 1.0},
 ('cat', 'cat', 'cat', 'cat'): {'cat': 0.8809523809523809,
  'dog': 0.11904761904761904},
 ('cat', 'cat', 'cat', 'dog'): {'dog': 1.0},
 ('cat', 'cat', 'dog', 'dog'): {'dog': 0.8571428571428571,
  'cat': 0.14285714285714285},
 ('cat', 'dog', 'dog', 'dog'): {'dog': 0.95, 'cat': 0.05},
 ('dog', 'dog', 'dog', 'dog'): {'dog': 0.9104477611940298,
  'cat': 0.08955223880597014},
 ('dog', 'dog', 'dog', 'cat'): {'cat': 0.9473684210526315,
  'dog': 0.05263157894736842},
 ('dog', 'dog', 'cat', 'cat'): {'cat': 1.0},
 ('dog', 'cat', 'cat', 'cat'): {'cat': 0.95, 'dog': 0.05},
 ('cat', 'dog', 'dog', 'cat'): {'cat': 0.6666666666666666,
  'dog': 0.3333333333333333},
 ('dog', 'dog', 'cat', 'dog'): {'dog': 1.0},
 ('dog', 'cat', 'dog', 'dog'): {'dog': 1.0}}

In [111]:
seq = generate(mc2, " ")
seq

'cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat dog dog cat cat cat cat cat cat dog dog cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat dog dog cat cat cat cat cat dog dog cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat dog dog cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat cat cat cat cat cat cat cat cat cat dog dog dog dog dog cat cat cat cat cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog cat cat cat cat cat cat dog dog dog cat cat cat cat dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog dog

## Loading up the sequence data

In [116]:
documents = []
for f in os.listdir("WheelOfTime"):
    if f.endswith(".txt"):
        fopen = open("WheelOfTime/" + f, encoding="utf-8")
        #documents += nltk.word_tokenize(fopen.read().lower().replace("\n", " "))
        documents += fopen.read().lower().replace("\n", " ")

In [117]:
len(documents)

24060959

In [135]:
documents[5000:5020]

['.',
 ' ',
 ' ',
 't',
 'h',
 'i',
 's',
 ' ',
 'h',
 'i',
 'g',
 'h',
 ',',
 ' ',
 'a',
 'n',
 ' ',
 'a',
 'l',
 'm']

In [132]:
mc_wot = learn_markov(documents, 5)

In [127]:
mc_wot[("q","u","a")]

{'l': 0.1999074502545118,
 'r': 0.5404905136510875,
 't': 0.08468301712170291,
 's': 0.012031466913465988,
 'w': 0.007866728366496992,
 'i': 0.03701989819527996,
 'b': 0.029153169828782972,
 'v': 0.005552984729291994,
 'y': 0.00971772327626099,
 'n': 0.014807959278111986,
 'k': 0.016658954187875982,
 'd': 0.04118463674224896,
 'g': 0.0009254974548819991}

In [133]:
seq = generate(mc_wot, "")
seq

' hardens, can’t  ceranding. it would have the two river replacid agree. “be welcomed reluctantly, “yes, spine on it," egwene blight, where than and resister heat of thing. after taim had re-released secret had told to reaching. it was force that did not reminded like an eyes had been now. darkfriends. “i’m going to  damodred passage of swagger, quickly. but not expected; the seen master head, looked suddenly before seen, but they join me friends were before supported. the other with a winespring'

## What is a tuple?

In [80]:
t = [1, 2, 3]

In [81]:
t2 = (1, 2, 3)

In [82]:
t[1]

2

In [83]:
t2[1]

2

In [84]:
t.append(4)

In [85]:
t2.append(4)

AttributeError: 'tuple' object has no attribute 'append'

In [86]:
t[1] = 7

In [87]:
t2[1] = 7

TypeError: 'tuple' object does not support item assignment

In [None]:
a