from notyourmotleycrew.timelinejs.models import Event
from notyourmotleycrew.timelinejs.models import Filterset
import nltk
import random
import codecs
filterset = Filterset.objects.get(title="ful")
thenteset = Filterset.objects.get(title="thentetest")
class MarkovModel:
def __init__(self): = {}
def add_text(self, text):
tokens = nltk.word_tokenize(text)
l = len(tokens)
for i in range(l - 2):
key = (tokens[i], tokens[i+1])
value = tokens[i+2]
if key not in[key] = [][key].append(value)
def get_random_upper_case(self):
get a random key but the first word must begin with uppercase letter
done = False
while not done:
pair = random.choice(
if (pair[0][0]).isupper():
result = pair
done = True
return result
def get_sentence(self, n):
result = u""
stop = n
done = 0
#next_pair = random.choice(
next_pair = self.get_random_upper_case()
result += u"{0}{1}{2}".format(next_pair[0], " " , next_pair[1])
next_word = "A"
while not (done > stop and next_word in [".", "?", "!"]):
while not next_pair in
next_pair = self.get_random_upper_case()
#next_pair = random.choice(
next_word = random.choice([next_pair])
next_pair = (next_pair[1], next_word)
done += 1
result += u" {0}".format(next_word)
return result
model = MarkovModel()
fn = "/tmp/output.txt"
fh =, "w", "utf-8")
def run():
#build the model
events = Event.objects.filter(filtersets=filterset)
for event in events:
#for key, value in
#print key, value
#print len(value)
for i in range(1000):
sentence = model.get_sentence(25)