## Imports

In [1]:
import re
from nltk.tokenize import word_tokenize
import random
import nltk

## Reading The Chamber of Secrets

In [2]:
path = "kaggle/input/harry_potter/02 Harry Potter and the Chamber of Secrets.txt"

def read_story(path):
    txt = []

    with open(path) as f:
        for line in f:
            line = line.strip()
            if line!='':txt.append(line)
    
    return txt

story = read_story(path)
print("number of lines = ", len(story))

number of lines =  3216


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt += words
    return cleaned_txt

cleaned_story = clean_txt(story)
print("number of words = ", len(cleaned_story))

number of words =  87271


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_story, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_story)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_story[i+j] + " "
            next_state += cleaned_story[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_story)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  45475


In [7]:
print("All possible transitions from 'chamber of' state: \n")
print(markov_model['chamber of'])

All possible transitions from 'chamber of' state: 

{'secrets chapter': 0.22807017543859648, 'secrets has': 0.05263157894736842, 'secrets what': 0.017543859649122806, 'secrets said': 0.07017543859649122, 'you all': 0.017543859649122806, 'secrets the': 0.017543859649122806, 'secrets so': 0.017543859649122806, 'secrets unleash': 0.017543859649122806, 'secrets ron': 0.017543859649122806, 'secrets for': 0.05263157894736842, 'secrets is': 0.07017543859649122, 'secrets harry': 0.03508771929824561, 'secrets was': 0.03508771929824561, 'secrets with': 0.017543859649122806, 'secrets his': 0.017543859649122806, 'secrets in': 0.017543859649122806, 'secrets fifty': 0.017543859649122806, 'secrets and': 0.05263157894736842, 'secrets closed': 0.017543859649122806, 'secrets they': 0.017543859649122806, 'secrets all': 0.017543859649122806, 'secrets out': 0.017543859649122806, 'secrets have': 0.017543859649122806, 'secrets he': 0.017543859649122806, 'secrets itself': 0.017543859649122806, 'secrets that':

## Text Generation

In [8]:
def generate(markov_model, limit=100, start='chamber of'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story += curr_state + " "
    while n < limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story += curr_state + " "
        n += 1
    return story

In [9]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="harry potter", limit=15))

1.  harry potter and the letters of his name could be dangerous dangerous said harry quickly and looked over his shoulder at the grass two solitary spiders were hurrying away from the witch 
2.  harry potter freed dobby said the headmaster you wanted to meet you said harry i can not possibly let you have a pee with her wailing at you don t believe it 
3.  harry potter must not be here soon snapped aunt petunia whisked away his plate at the ministry i hear said mr malfoy smoothly and spectacularly on the sweeping lawn in front of 
4.  harry potter the crowd of curious muggles out of the diary in torrents streaming over harry potter survived and the color was draining out of ernie s face worked horribly he knows 
5.  harry potter and the sorting hat into his ripped bag desperate to tell ron and hermione grabbed him and they were having what looked like an equal harry trying to cheer him 
6.  harry potter survived and the dark corridor where they stood came the deep grunt of crabbe from h

In [10]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="hermione granger", limit=15))

1.  hermione granger they however didn t say anything justin had run away from him at lunchtime people here ll believe it i tell you at but i t know sobbed ginny i 
2.  hermione granger and i thought i was slytherin s heir from everything ginny had told me about you too just last night shouted mrs weasley what are you myrtle said hermione in 
3.  hermione granger they however didn t matter this way he was rocking backward and forward slightly in her chair exactly like dobby did when he was still alive said riddle s voice 
4.  hermione granger hermione raised a trembling hand excellent beamed lockhart they will knock you out for the bottom stair it creaks harry whispered back as the teachers bent over justin and nearly 
5.  hermione granger hermione raised a trembling hand excellent beamed lockhart quite excellent take ten points for gryffindor and so to business he bent down and looked over at harry s mouth fell 
6.  hermione granger and i m right underneath the first one her skeleton 

In [11]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="ron weasley", limit=15))

1.  ron weasley wasn t there he said too right you will said lockhart i ll go to any lengths for a glimpse of uncle vernon s voice harry had no chance to 
2.  ron weasley was outside harry s disappointment riddle led him not into a hidden passageway or a secret broom cupboard i regret telling you there s one thing left he shot inside 
3.  ron weasley was outside harry s mouth and its escape into the darkness their footsteps echoing particularly loudly as crabbe s and george s bedroom were considered perfectly normal what harry found 
4.  ron weasley and hermione came hurrying up to the she didn t enjoy his shepherd s pie as much as the other balls for colin asked tripping down a couple more feathers 
5.  ron weasley was outside harry s reach he pulled a heavy book from the direction of adrian pucey was trying hard not to imagine what ginny might look like if they found 
6.  ron weasley was outside harry s turn for an elbow in the kitchen gave a small elephant emerged very slowly his ey

In [12]:
print(generate(markov_model, start="professor dumbledore", limit=500))

professor dumbledore these boys have flouted the decree for the distant sounds of two dormitory doors closing before seizing the cloak throwing it over malfoy said mr weasley eagerly did it go all right if you need help you know what you were about to do he cleared his throat i then screwed up my remaining strength and performed the immensely complex homorphus he let out a long piercing scream ink spurted out of malfoy the better snarled ron d you know parents there are some wizards like malfoy s family before and they ll be able to climb the next staircase without difficulty d you think it s good and beheaded but oh no it s been happening harry told them about fawkes s timely arrival and about the match being canceled others looked worried harry and the whole of london lay smoky and glittering below them winding its way past a snowcapped mountain it was much too large and slipped down over his eyes widening as though this was obvious was giving you a ghost harry said uncertainly a mem