In [2]:
## importing dependencies
import numpy as np
import pandas as pd
import os
import string
import re
import random
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords



In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bagwe\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

##### Reading the dataset i.e collection of stories of harry potter in text file.

In [4]:
current_dir=os.getcwd()
dataset_path=os.path.join(current_dir,"dataset")

def read_content(dataset_path,encoding='utf-8'):
    txt=[]
    for _,_,files in os.walk(dataset_path):
        for file in files:
            file_path=os.path.join(dataset_path,file)
            with open(file_path,encoding=encoding) as f:
                for line in f:
                    line=line.strip()
                    if line == '----------':
                        break
                    if line != '':
                        txt.append(line)
                        
    return txt
                
        

In [5]:
total_content_lines=read_content(dataset_path)
print("Total number of lines: ",len(total_content_lines))

Total number of lines:  123353


##### Defining a func to process a text before using markov chain on it.

In [6]:
def process_text(txt):
    processed_text=[]
    for line in txt:
        line=line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)   ## removing all the punctutaions.
        words_list=word_tokenize(line)
        words=[]
        for word in words_list:
            if word.isalpha():
                words.append(word)
        processed_text += words
    return processed_text
        
        
        

In [7]:
words_list=process_text(total_content_lines)
print("Total No of words: ",len(words_list))

Total No of words:  1126958


##### Creating markov model based on markov chain rule.

In [8]:
def markov_model(words_list, order=2):
    model = {}
    for i in range(len(words_list) - order-1):
        current_state = ""
        next_state = ""
        for j in range(order):
            current_state += words_list[i + j] + " "
            next_state += words_list[i + j + order] + " "
        
        current_state = current_state[:-1]
        next_state = next_state[:-1]
        
        if current_state not in model:
            model[current_state] = {}
            model[current_state][next_state] = 1
        else:
            if next_state in model[current_state]:
                model[current_state][next_state] += 1
            else:
                model[current_state][next_state] = 1
    
    for current_state, transition in model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            model[current_state][state] = count / total
    
    return model




In [9]:

markov_chain = markov_model(words_list)

In [10]:
print("number of states = ", len(markov_chain.keys()))

number of states =  323194


In [11]:
print("Combinations of words using  markov-chain relation")
print(markov_chain['the game'])

Combinations of words using  markov-chain relation
{'when the': 0.03333333333333333, 'squinting about': 0.03333333333333333, 'but this': 0.03333333333333333, 'jerking and': 0.03333333333333333, 'ended in': 0.03333333333333333, 'before snape': 0.03333333333333333, 'like a': 0.03333333333333333, 'said you': 0.03333333333333333, 's over': 0.03333333333333333, 'was over': 0.03333333333333333, 'of chess': 0.03333333333333333, 'until he': 0.03333333333333333, 'the teams': 0.03333333333333333, 'and earned': 0.03333333333333333, 'he couldnt': 0.03333333333333333, 'out of': 0.03333333333333333, 'with malfoy': 0.03333333333333333, 'after a': 0.03333333333333333, 'was starting': 0.03333333333333333, 'recommenced immediately': 0.03333333333333333, 's in': 0.03333333333333333, 'quickly and': 0.03333333333333333, 'the following': 0.03333333333333333, 'is up': 0.03333333333333333, 'gone gryffindor': 0.03333333333333333, 'ginny where': 0.03333333333333333, 'was running': 0.03333333333333333, 'had beco

In [12]:
random_keys = random.sample(list(markov_chain.keys()), 100)
print("Random 100 keys:", random_keys)


Random 100 keys: ['burning sensation', 'decided last', 'too dudley', 'yehve had', 'every other', 'arts owl', 'were keeping', 'teenage dumbledore', 'them lately', 'arouses such', 'have searched', 'meanwhile felt', 'adventure and', 'would stay', 'hannah abbot', 'unsafe it', 'was attracting', 'punishment we', 'and good', 'quite true', 'surrounding seats', 'straight past', 'but youre', 'went they', 'overview of', 'them standing', 'without it', 'uncle you', 'representing the', 'against an', 'remember something', 'a stupid', 'heavy book', 'looking frightened', 'bossy that', 'rapid french', 'mulish expression', 'hall there', 'early birthday', 'you keep', 'mutilated true', 'card that', 'of rubeus', 'own school', 'this next', 'pints of', 'was curling', 'anything weird', 'malfoys roots', 'perce shouted', 'george turned', 'percy hoped', 'roar on', 'gazed longingly', 'landed noiselessly', 'were powerful', 's twisted', 'suppose not', 'ginny since', 'were even', 'address harry', 'salamanders got', '

In [13]:
def get_story(markov_model,limit=150,start="international magical"):
    count=0
    current_state=start
    next_state=None
    gen_story=""
    gen_story += current_state + " "
    while count < limit:
        next_state=random.choices(list(markov_chain[current_state].keys()),
                                    list(markov_chain[current_state].values()))
        current_state = next_state[0]
        gen_story += current_state + " "
        count+=1
    return gen_story
    

In [14]:
print(get_story(markov_model,limit=200,start="international magical"))

international magical cooperation don t mention that ter anyone at hogwarts s founders said dumbledore i sometimes find and i d still have old photographs maybe even letters she knew something harry could not possibly know how bad bertha s gotten lost from red to green faster than severus snape along the tunnel turning pettigrew in you re saying it aloud typically ten minutes later errol and hedwig s cage then turned into hermione s jug spraying them all with rather stupid then both hat and with a thud that could be the sun where they were forced to stop for minutes at a joke shop into the back of his head and retreating further from dumbledore if you ask for the best what is that it was clear that dudley was as vast quantities of what appeared to be in hogsmeade said ron at once and left the office one last sweeping look and then broke into song a thousand years or more of my year at hogwarts he reached the station there was a big supporter of he said what will they learn said uncle v

In [15]:
print(get_story(markov_model,limit=250,start="harry breathed"))

harry breathed deeply for a few seats along harry saw the dragon s wings beating the water spears clutched in his fat legs would carry her pulling out his dream harry had just seen off the team but of course cried hermione pointing her wand at the quill whizzed across the parchment and scanning it closely that seems perfectly in order she said placidly to mr weasley and she broke off still wearing it i wouldn t put it in the far end of solid evidence that not one of the irritating things about ginny she d have thought you had enough to worry about said ron though was staring at him he had to stir feverishly it ll be able to break it up hagrid said loudly i know the theory exams in three days after they had arrived on what s happened cissy bellatrix lestrange walked slowly around at them all mrs weasley beamed down at him at the front me he said i don t you talk about him then no she replied and she too left chapter dumbledore happens to her in the corridors alone break a rule that says

In [16]:
print(get_story(markov_model,limit=250,start="harry sickened"))

harry sickened by the ministry they didn t want to see ginny wincing frequently as neville moved too in one of the cardboard boxes and had agreed to try and bring a real dementor so professor trelawney seemed to have missed a muggle studies class but half of it rosmerta said fudge who looked truly awake fred and george ve left it had stopped twitching and professor lupin as the best in some people and the kwikspell envelope which harry glared at him then there was such a thing because said harry shaking the rope up to the death eaters and hogwartians alike and red and thereafter he reverted to his usual gusto bradley davies chang he said someone was going to be hell to pay for what he was pleased to welcome two new teachers to our cause your death will not bring it up saying whoops my wand you re sure they could not remember it well that as an apple and as the ring sailed within an inch of the room went icily cold and hard behind them that hagrid had given him a bit unfair professor du

In [25]:

random_keys = random.sample(list(markov_chain.keys()), 100)
print("Random 100 keys:", random_keys)


Random 100 keys: ['i than', 'traveled in', 'husband arthur', 'marge he', 'secret chamber', 'on ruthlessly', 'extra powers', 'front nothing', 'had fallen', 'his beak', 'already heavily', 'mustn t', 'crouch binding', 'behind here', 'of earwax', 'who appeared', 'above dumbledore', 'voice growing', 'angry every', 'go potters', 'energy that', 'be socks', 'dark solid', 'round this', 'snorted in', 'accio and', 'lagging behind', 'the still', 'as celebrity', 'bath help', 'looked desperately', 'mcgonagall tartly', 'very large', 'stealing said', 'taken heat', 'lessons when', 'pursued the', 'half truculent', 'the easy', 'warmly at', 'challenges in', 'last finally', 'were possibly', 'and gained', 'conscious self', 'soo hungry', 'trees on', 'eaters i', 'a twinge', 'stone there', 'another girls', 'stone effigy', 'nice if', 'we this', 'half irritably', 'squad should', 'returned didn', 'the henhouse', 'assuming this', 'anything said', 'was processed', 'bounced onto', 'enormous silver', 'play before', '

In [34]:
def markov_model_2(words_list, order=6):
    model = {}
    for i in range(len(words_list) - order):
        current_state = " ".join(words_list[i:i+order])
        next_state = words_list[i + order]
        
        if current_state not in model:
            model[current_state] = {}
            model[current_state][next_state] = 1
        else:
            if next_state in model[current_state]:
                model[current_state][next_state] += 1
            else:
                model[current_state][next_state] = 1
    
    for current_state, transition in model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            model[current_state][state] = count / total
    
    return model


In [55]:
def get_story_2(markov_model_2, limit=150, start="from the game jerking and twitching"):
    count = 0
    current_state = " ".join(start.split()[-6:])  # Initialize with the last 6 words of the start
    gen_story = start + " "
    
    while count < limit:
        next_state = random.choices(
            list(markov_model_2[current_state].keys()),
            list(markov_model_2[current_state].values())
        )
        current_state = " ".join((current_state.split() + [next_state[0]])[-6:])
        gen_story += next_state[0] + " "
        count += 1

    return gen_story


In [56]:
markov_model_6=markov_model_2(words_list)

In [73]:

random_keys = random.sample(list(markov_model_6.keys()), 20)
print("Random 20 keys:", random_keys)


Random 20 keys: ['whole village seemed to have turned', 'when they heard voices nearby someone', 'harry holding up a pasty go', 'to curl his hair more like', 'word broomsticks in his living room', 'time harry could not hear anything', 'softly as she strode between two', 'work out whether they believe he', 'stings burns embedded spines etc chapter', 'dobby anxiously jumping backward with his', 'he s gone missing like bertha', 'professor harry said his voice cracking', 'me i deserved it snapped ron', 'all the villagers cared about was', 'see him very much said dumbledore', 'the raised platform the moment she', 'what could it be a meeting', 'harry let us ask potter how', 't want to give her the', 'be so proud there was an']


In [67]:
story=get_story_2(markov_model_6,limit=200,start='harry felt a sudden upsurge of')

In [68]:
story

'harry felt a sudden upsurge of affection for his godfather at least you ve known what s been going on he said bracingly oh yeah said sirius sarcastically listening to snape s reports having to take all his snide hints that he s out there risking his life while i m sat on my backside here having a nice comfortable time asking me how the cleaning s going what cleaning asked harry trying to make this place fit for human habitation said sirius waving a hand around the dismal kitchen no one s lived here for ten years not since my dear mother died unless you count her old and he s gone round the twist hasn t cleaned anything in ages sirius said mundungus who did not appear to have paid any attention to this conversation but had been minutely examining an empty goblet this solid silver mate yes said sirius surveying it with distaste finest fifteenthcentury silver embossed with the black family crest that d come off though muttered mundungus polishing it with his cuff fred george no just carr

In [74]:
print(get_story_2(markov_model_6,limit=200,start='harry holding up a pasty go'))

harry holding up a pasty go on you don t want this it s all dry said ron she hasn t got much time he added quickly you know with five of us go on have a pasty said harry who had never had anything to share before or indeed anyone to share it with it was a nice feeling sitting there with ron eating their way through all harry s pasties cakes and candies the sandwiches lay forgotten what are these harry asked ron holding up a pack of chocolate frogs they re not really frogs are they he was starting to feel that nothing would surprise him no said ron but see what the card is i m missing agrippa what oh of course you wouldn t know chocolate frogs have cards inside them you know to collect famous witches and wizards i ve got about five hundred but i haven t got agrippa or ptolemy harry unwrapped his chocolate frog and picked up the card it showed a man s face he wore glasses had a long crooked nose and flowing silver hair beard and mustache underneath the picture was the name albus dumbledo