In [14]:
## importing dependencies
import numpy as np
import pandas as pd
import os
import string
import re
import random
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords



In [15]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bagwe\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

##### Reading the dataset i.e collection of stories of harry potter in text file.

In [16]:
current_dir=os.getcwd()
dataset_path=os.path.join(current_dir,"dataset")

def read_content(dataset_path,encoding='utf-8'):
    txt=[]
    for _,_,files in os.walk(dataset_path):
        for file in files:
            file_path=os.path.join(dataset_path,file)
            with open(file_path,encoding=encoding) as f:
                for line in f:
                    line=line.strip()
                    if line == '----------':
                        break
                    if line != '':
                        txt.append(line)
                        
    return txt
                
        

In [17]:
total_content_lines=read_content(dataset_path)
print("Total number of lines: ",len(total_content_lines))

Total number of lines:  123353


##### Defining a func to process a text before using markov chain on it.

In [18]:
def process_text(txt):
    processed_text=[]
    for line in txt:
        line=line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)   ## removing all the punctutaions.
        words_list=word_tokenize(line)
        words=[]
        for word in words_list:
            if word.isalpha():
                words.append(word)
        processed_text += words
    return processed_text
        
        
        

In [19]:
words_list=process_text(total_content_lines)
print("Total No of words: ",len(words_list))

Total No of words:  1126958


##### Creating markov model based on markov chain rule.

In [31]:
def markov_model(words_list, order=2):
    model = {}
    for i in range(len(words_list) - order-1):
        current_state = ""
        next_state = ""
        for j in range(order):
            current_state += words_list[i + j] + " "
            next_state += words_list[i + j + order] + " "
        
        current_state = current_state[:-1]
        next_state = next_state[:-1]
        
        if current_state not in model:
            model[current_state] = {}
            model[current_state][next_state] = 1
        else:
            if next_state in model[current_state]:
                model[current_state][next_state] += 1
            else:
                model[current_state][next_state] = 1
    
    for current_state, transition in model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            model[current_state][state] = count / total
    
    return model




In [32]:

markov_chain = markov_model(words_list)

In [35]:
print("number of states = ", len(markov_chain.keys()))

number of states =  323194


In [37]:
print("Combinations of words using  markov-chain relation")
print(markov_chain['the game'])

Combinations of words using  markov-chain relation
{'when the': 0.03333333333333333, 'squinting about': 0.03333333333333333, 'but this': 0.03333333333333333, 'jerking and': 0.03333333333333333, 'ended in': 0.03333333333333333, 'before snape': 0.03333333333333333, 'like a': 0.03333333333333333, 'said you': 0.03333333333333333, 's over': 0.03333333333333333, 'was over': 0.03333333333333333, 'of chess': 0.03333333333333333, 'until he': 0.03333333333333333, 'the teams': 0.03333333333333333, 'and earned': 0.03333333333333333, 'he couldnt': 0.03333333333333333, 'out of': 0.03333333333333333, 'with malfoy': 0.03333333333333333, 'after a': 0.03333333333333333, 'was starting': 0.03333333333333333, 'recommenced immediately': 0.03333333333333333, 's in': 0.03333333333333333, 'quickly and': 0.03333333333333333, 'the following': 0.03333333333333333, 'is up': 0.03333333333333333, 'gone gryffindor': 0.03333333333333333, 'ginny where': 0.03333333333333333, 'was running': 0.03333333333333333, 'had beco

In [65]:
random_keys = random.sample(list(markov_chain.keys()), 100)
print("Random 100 keys:", random_keys)


Random 100 keys: ['fluffy yeah', 'piece eat', 'every thread', 'was removed', 'freeze where', 'fine son', 'its scaly', 'probably had', 'prepare such', 'came scurrying', 'shouts it', 's wooden', 'master voldemort', 'shoots were', 'brothers h', 'had bitten', 'hagrid firmly', 'and threw', 'after curse', 'trudged back', 'stood we', 'arrest within', 'really belting', 'sweltering hot', 'mark amos', 'go dewy', 'kill potter', 'instantly mr', 'eye sockets', 'of stopping', 'cho thought', 'castle again', 'whispered why', 'could end', 'mortlake was', 'ah wormtail', 'flat foot', 'snapped padma', 'control secrets', 'me closer', 'fall then', 'fresh flagon', 'pomfrey so', 'kreacher closed', 'ducked just', 'nigellus called', 'and secretive', 'said either', 'differed from', 'whiskers whose', 'seventeen professor', 'snape noticing', 'rebellious harry', 'yelp told', 'bruise already', 'we destroyed', 'de cologne', 'prophet big', 'on cried', 'not suggest', 'rowling serious', 'lupin wonderful', 'rejoin ron', 

In [59]:
def get_story(markov_model,limit=150,start="international magical"):
    count=0
    current_state=start
    next_state=None
    gen_story=""
    gen_story += current_state + " "
    while count < limit:
        next_state=random.choices(list(markov_chain[current_state].keys()),
                                    list(markov_chain[current_state].values()))
        current_state = next_state[0]
        gen_story += current_state + " "
        count+=1
    return gen_story
    

In [63]:
print(get_story(markov_model,limit=200,start="international magical"))

international magical cooperation and magical games and sports there was something he s in charge here now and then raising bottles to guard his own precious soul you d know for sure said ron looking disappointed but i doubt anyone except you an he told us as much right to be proud though won t they the whole black family had been right in the eyes you have these harry s voice audible even above this was such that harry though shielded by his promise we ll never be lost for words despite his indignation at the position of his pawns chased off its square by ron s indignation on harry s disgusting slimy sock and stuffed the letter away again inside that cavernous room full of croaking bullfrogs and cawing ravens and with a nasty little laugh extremely dangerous a special case i really didnt want to tell me when you came in great rasps now harry ron and ginny skimming over the fire to harry s sense of security harry potter but winky does what she meant harry groped for the potion glowed a

In [66]:
print(get_story(markov_model,limit=250,start="harry breathed"))

harry breathed deeply for a few years older than you were when you fought off about a hundred yards of the place as she drifted off to take hagrid off to azkaban in fact creatures were cantering softly up and he must have seen her new wizarding neighbors she soon released him ron and ginny so how was practice asked hermione rather timidly is that your hot head might dominate your good health harry and he could not be long now it is late then though he sighed deeply well we hadn t locked the thing you dread him dying wasnt l oh said harry well i need to step inside and still professor mcgonagall didn t professor dumbledore wouldn t have been said hermione i haven t got agrippa or ptolemy harry unwrapped his chocolate wishing he could say another word as they marched off toward the window where half hidden in shadow wearing a strangely triumphant look what they did it the most horrible noise a yowling screeching scream keep back there this is it then we can t he been coming to judge that

In [70]:
print(get_story(markov_model,limit=250,start="harry sickened"))

harry sickened by the whomping willow was planted because i told her and ron together hermione was acting through somebody else by a dark room whose windows were rattling in their frames but along what seemed to be coming from a locked door for a few months one summer when we but do you know i mean he s sensitive people bully him too he came back then and mind felt oddly disconnected now his limbs and followed professor lupin go the chance ter get rid of the fire ravenous they ate their lunch hour in the library staggering under the weight of the back of which broomstick the firebolts superbly smooth action its phenomenal acceleration and its rays mingled with the pair of double doors and along the passageway leading into the next day because every time was mentioned he supposed that he had lowered its mouth and belched flame again blasting the tapestry with a wand in one of her robes bulging when everyone had copied down their homework from the blackboard using defensive spells chapte