<img src="https://drive.google.com/uc?export=view&amp;id=1zSJwAUxWv5bxyYLmYPNi-s6M_Wq5iWXh">

## Importing tools

In [1]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [2]:
story_path = "/kaggle/input/sherlock-holmes-stories/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_stories)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [7]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'was in': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [8]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [9]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes oh yes mr holmes i do not know how i came to you for a hundred 
1.  dear holmes am i right certainly but how do you know that anything she is accused of is 
2.  dear holmes you are right said the stranger there was no possible resistance under the menace in his 
3.  dear holmes he has been concerned in it and no fresh light had been opened to admit the 
4.  dear holmes said i i thought you were asked to step up wiggins i was surprised to hear 
5.  dear holmes my previous letters and no marking upon the tor it is true said sir henry it 
6.  dear holmes he has done to cure her of her for the legal expenses connected with the death 
7.  dear holmes oh yes i am a man should keep his little boy home a box of vestas 
8.  dear holmes what do you think then that had tasted blood if sarah had been there since one 
9.  dear holmes said i he shrugged his shoulders and lit the lamp in the place and i had 
10.  dear holmes i have obeyed my fathers wish in seeing you but what is very

In [10]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear watson to stop the service and he is to send him to come to me again 
1.  my dear watson do you imagine what it all means and let me give such orders as i 
2.  my dear watson said he there must be someone from america because she had resolved to stick together 
3.  my dear watson said she for three days longer and there was nothing immediate that i could help 
4.  my dear chap i had seen it in behalf of three of our men we are both very 
5.  my dear watson there is nothing else of importance here but i am so glad you have come 
6.  my dear fellow no one can not see across the mountains on each side of his dark eyes 
7.  my dear fellow i cried approaching him stand back stand right back and let me have my rest 
8.  my dear watson that at two oclock au revoir and we heard a cry in which two boys 
9.  my dear watson he paused after each sentence to collect his failing had ceased to be of a 
10.  my dear sir knowing the vindictive character of his old carelessness of your serva

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would go round the house was none other than the opium den and who was orderly in 
1.  i would not have shone upon his eager mind and from that mr douglas that day but i 
2.  i would care if it was a drive and the beast save from what you say mr white 
3.  i would only ask you now she has passed through my mind and trying to find out what 
4.  i would ask you who shared your cab in your own mind i can hardly put my foot 
5.  i would go away and saying that he has returned after me oh no my friend a chill 
6.  i would have had her when the mutiny and has many calls upon me and went on up 
7.  i would say nothing thereof to their sister elizabeth when dr mortimer has read to us sir that 
8.  i would lay a peculiar instrument a brown chest of drawers in the room all day in the 
9.  i would also urge that the circumstances did not admit that it can be indicated by that right 
10.  i would give my little impersonations your kindly praise it was you speak as if he were standing 
11.  i

In [12]:
print(generate_story(markov_model, start="the case", limit=100))

the case of pondicherry lodge sherlock holmes was threatened with a prosecution must be more than i could see that no apology to me by its suppression as far as i know that now and forever he pushed past the couch to the open window we could see no better known family in the temple it was a normal human being every one was a severely furnished and the lady how dared i offer than to observe him from the scene i have a look at the place of silver blaze i have seen the will but the address is either done with another passage running into it but the greeting appeared to come for half an hour or two things about moriarty which may interest the reader now at present i can give them are they blood stains or fruit stains or what he was said lestrade anyhow we have no more to do with the matter this is what began it all mean i gasped it means that we shall soon see the end a shadow passed over the gaunt features of great interest have you might have been chiselled in granite craggy remorseless 