## Importing tools

In [2]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [3]:
story_path = "/kaggle/input/sherlock-holmes-stories/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [4]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [5]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [8]:
markov_model = make_markov_model(cleaned_stories)

In [9]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [10]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'was in': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [11]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [12]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes said i are there can be done quite remarkably well said holmes for you if you 
1.  dear holmes you are doing i thought that as a civilian as long as you have taken a 
2.  dear holmes am i to remain neutral to get near the window was entirely trivial and that his 
3.  dear holmes what do you think would happen if i could only learn part of the incident should 
4.  dear holmes i have explained are the rough nugget on it was the sides of the victim nor 
5.  dear holmes if i have made use of taking them to helston we looked at the door and 
6.  dear holmes it is a pure mastiff but it appeared to have the upper floor and lay as 
7.  dear holmes he has a daughter and that is why i crawled about the transaction that the piece 
8.  dear holmes i have your very interesting experience exactly as they had called upon him to step up 
9.  dear holmes i thought that it is the ancient manor house from the road when two free citizens 
10.  dear holmes i thought that it was a frequent a

In [13]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear holmes what do you see any clue you have furnished but you will have to come 
1.  my dear watson but i can say did god make this country that i gather that he has 
2.  my dear watson he said at last it is a promise mr holmes if you please gentlemen and 
3.  my dear watson clumsy as i had pictured within the scope of our observation so much for his 
4.  my dear young lady at the latter club he had lived as happily as any two words in 
5.  my dear sir cried dr mortimer the two slabs of marble let you have the missing piece were 
6.  my dear watson the alternative use of the unfortunate trainer his head and an object of mingled horror 
7.  my dear watson yet another had the volume from the road but the surroundings all spoke of the 
8.  my dear watson said he with a furious pace in the direction of blackheath and why sir because 
9.  my dear sir it was one of those adventures which were the stables holmes had sat intently observant 
10.  my dear watson we have ended in so wet 

In [14]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would always be true to their salt this pretended merchant who travels under the name you see 
1.  i would ask you whether you have any practice at all yesterday however just as i should be 
2.  i would not dare to conceive in his excitement i had enough knee left to keep a tight 
3.  i would have nothing to hide his own responsibility entirely inquiries have shown that his ministers know nothing 
4.  i would venture to smoke to pass the falls of reichenbach which are about to do with me 
5.  i would engage a launch and go down to hampshire quite easy in my career i am a 
6.  i would be a morose and disappointed man when dr barnicot came down this staircase is a small 
7.  i would call your attention very particularly to the position said holmes springing to his feet it wont 
8.  i would always carry the case and may you be at home we neither of us rich men 
9.  i would at once as stanley hopkins a young police inspector ive made inquiry all over the hotel 
10.  i would there was

In [15]:
print(generate_story(markov_model, start="the case", limit=10))

the case and one whom i had dandled on my sleeve however and it ended in so tragic a manner and certain 


In [16]:
print(generate_story(markov_model, start="the case", limit=100))

the case fortune has been your lodger for seven years my senior how comes it that would go said she well i shall be paid for it if the gipsys evidence may implicate not only your friend mr sherlock holmes laughing only one of the phrases in that deadly room when we had regained their fire and there is the obvious possibility that the writer was on board and the smokeless chimneys however gave it a deep muttered rumble musical and yet you are the master criminal holmess figure seemed to grow colder every step that i take it for granted that since dawn every constable within forty miles has been looking out for something or nothing well then here on this planet so say the most silent in the matter i have no doubt they are though i fail to see me as a companion lithe and agile with a springy step which showed that he was a man that would be obviously impossible did he ask what ailed me i was and would have brought me a letter from you i can hear him unlock it and get on a quiet countrysid