## Imports

In [1]:
import re
from nltk.tokenize import word_tokenize
import random
import nltk

## Reading The Chamber of Secrets

In [2]:
path = "kaggle/input/harry_potter/02 Harry Potter and the Chamber of Secrets.txt"

def read_story(path):
    txt = []

    with open(path) as f:
        for line in f:
            line = line.strip()
            if line!='':txt.append(line)
    
    return txt

story = read_story(path)
print("number of lines = ", len(story))

number of lines =  3216


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt += words
    return cleaned_txt

cleaned_story = clean_txt(story)
print("number of words = ", len(cleaned_story))

number of words =  87271


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_story, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_story)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_story[i+j] + " "
            next_state += cleaned_story[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_story)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  45475


In [7]:
print("All possible transitions from 'chamber of' state: \n")
print(markov_model['chamber of'])

All possible transitions from 'chamber of' state: 

{'secrets chapter': 0.22807017543859648, 'secrets has': 0.05263157894736842, 'secrets what': 0.017543859649122806, 'secrets said': 0.07017543859649122, 'you all': 0.017543859649122806, 'secrets the': 0.017543859649122806, 'secrets so': 0.017543859649122806, 'secrets unleash': 0.017543859649122806, 'secrets ron': 0.017543859649122806, 'secrets for': 0.05263157894736842, 'secrets is': 0.07017543859649122, 'secrets harry': 0.03508771929824561, 'secrets was': 0.03508771929824561, 'secrets with': 0.017543859649122806, 'secrets his': 0.017543859649122806, 'secrets in': 0.017543859649122806, 'secrets fifty': 0.017543859649122806, 'secrets and': 0.05263157894736842, 'secrets closed': 0.017543859649122806, 'secrets they': 0.017543859649122806, 'secrets all': 0.017543859649122806, 'secrets out': 0.017543859649122806, 'secrets have': 0.017543859649122806, 'secrets he': 0.017543859649122806, 'secrets itself': 0.017543859649122806, 'secrets that':

## Text Generation

In [8]:
def generate(markov_model, limit=100, start='chamber of'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story += curr_state + " "
    while n < limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story += curr_state + " "
        n += 1
    return story

In [9]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="harry potter", limit=15))

1.  harry potter the words sir nicholas de died october harry watched amazed as a school treat oh no professor see harry harry did it he did hp harry potter and his friends 
2.  harry potter is valiant and bold he has to deal with muggle business all the time s flown hasn t reported all these attacks is caught you ll need to address the 
3.  harry potter said riddle drawing out his wand lockhart slid his legs into the school and was told that the candles burned lower and lower making the light as though we were 
4.  harry potter mustn t be harry lay awake for hours mrs weasley looking nervously at the floor and the snake was spitting in agony no harry heard riddle s hissing voice kill 
5.  harry potter and the dormitories night harry called back to finish these people with dumbledore gone fear had spread as never before so that the stars shone brightly on the end of 
6.  harry potter goes back to hogwarts it s a real emergency section nineteen or something of a holdup in the head and t

In [10]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="hermione granger", limit=15))

1.  hermione granger and i ll have to buy my autobiography which i shall be happy to pass on my expertise to less able harry made an indistinct noise in his face and 
2.  hermione granger standing at a doorway muttering to each other what s up said ron uncertainly have you said harry with a wave of yet more water which splashed onto the already 
3.  hermione granger always top in everything hermione beamed as she threw sausages into the chattering crowd someone told me a voice suddenly echoed behind harry and ron followed snape up the steps 
4.  hermione granger standing at the top and professor mcgonagall stepped onto it harry heard a door creak open and then i had one of the books mum got me i m pale 
5.  hermione granger hermione raised a trembling hand excellent beamed lockhart they will knock you out for several hours earlier than he would have asked anyone can make a mistake said hermione and 
6.  hermione granger always top in everything hermione beamed as she had her copy of th

In [11]:
for i in range(20):
    print(str(i+1)+". ", generate(markov_model, start="ron weasley", limit=15))

1.  ron weasley and hermione chose seats as far as possible from percy in the cupboard under the stairs and the moment harry was sure he had pushed to the front of her 
2.  ron weasley and hermione looked at dumbledore who smiled faintly the firelight and then he caught up with them at least he d have to tell them about colin but hermione interrupted 
3.  ron weasley was outside harry s room third time this week he roared across the table crouched low and walked to the school and he still had to all three of them 
4.  ron weasley wasn t even aware of deciding to do extra but why don t you have told us all this yesterday when we were awake wood wasn t pleased harry was 
5.  ron weasley was outside harry s astonishment dumbledore smiled about time too he said hotly if lockhart s trunk aside lockhart was standing well back and no more ask no more ask 
6.  ron weasley wasn t paying attention said myrtle blushing silver urgh said ron as they left the great hall harry and ron paced around it

In [12]:
print(generate(markov_model, start="professor dumbledore", limit=500))

professor dumbledore has only been suspended by the searing pain in his numb brain get to the other balls for colin asked tripping down a couple of steps to gringotts she ran down to lockhart s office they turned into muddy streams and hagrid had left after a quarter of an hour adding more lacewings to the potion looked like the largest but opposite was a bird that resembled a park bench i mean we teachers have quite enough to make your bludger said harry anger was coursing through him to the hospital wing burst open at that very moment a heavy book from the stack on the polyjuice potion again ron and hermione had made it look extremely easy but it wasn t even aware of deciding to do i m in gryffindor tower harry showed her t m riddle s face to harry no second year could have picked it up saying whoops my wand is a little lesson let s match he went on but i was sympathetic i was kind ginny simply loved me no one s lasted long fer a while said hermione in a corner of the gryffindor team