## Importing tools

In [1]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [2]:
story_path = "/kaggle/input/sherlock-holmes-stories/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_stories)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [7]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'was in': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [8]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [9]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes i fear said holmes the officer gave the alarm she and mrs king had reentered the 
1.  dear holmes if i had been too much for my past history and get away for all i 
2.  dear holmes if i gave you were as elated as if it were thought possible that i am 
3.  dear holmes oh yes i have misjudged him when i have finished at the coronet their united strength 
4.  dear holmes i exclaimed oh the she shall pay for it with great interest and i was lucky 
5.  dear holmes you are on his track for years and whose absolute reliability is quite above suspicion another 
6.  dear holmes i have made myself clear i answered with a suspicious flash of lightning in the night 
7.  dear holmes i fear that i left this door shut with a goatee beard which gave it birth 
8.  dear holmes i exclaimed and this i have ever seen with a firm step and an engagement which 
9.  dear holmes i ejaculated commonplace said holmes though it is just in such details that the night was 
10.  dear holmes i thought 

In [10]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear watson said he that circle is drawn at a radius of ten miles there are as 
1.  my dear watson there are some cigarettes i saw him stoop and a little chirrup of joy and 
2.  my dear watson i dont think so holmes walked across the drawbridge its open mouth its eyes glowed 
3.  my dear daughter alice now in custody and who had told him shortly what was intended it is 
4.  my dear watson that sensational cases had disappeared mr aloysius doran two days later that is another matter 
5.  my dear fellow i thought you would come from a forlorn hope could have recognized that this was 
6.  my dear watson to think that with a long draught of water through one of the bodies in 
7.  my dear fellow i often think of those who had been watching the oil lamps had been lit 
8.  my dear fellow i congrat to milvertons housemaid good heavens cried the king nothing could be the result 
9.  my dear holmes am i right on both sides like bob and me was already out of date 
10.  my dear sir we shall

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would spare you five minutes i shall at least show our people what is known of the 
1.  i would go i really dont think he is and nothing will persuade me to believe that they 
2.  i would not have gone so far that we can get the address of the receiver who had 
3.  i would also urge that the coast seems to be somewhat vague as to make himself out to 
4.  i would have heard me remark that the end had sir charles received any other letters it occurred 
5.  i would have allowed his guilty conscience it may have observed that the bust was smashed well thats 
6.  i would mind apologizing to hudson i refused to answer him as on a hot scent and yet 
7.  i would deduce and acted accordingly and what is the owner going to restore the fallen grandeur of 
8.  i would rather not answer that quite so there you are and what it was it not possible 
9.  i would not touch me with bleared drunken eyes for my part after we heard the rattle of 
10.  i would give so much to win what a pretty bad time

In [12]:
print(generate_story(markov_model, start="the case", limit=100))

the case is there anything else you have acted with great discretion who sent you why i hated these men are and what it is that it was the body but here he is certainly a little untrustworthy said holmes he was but he did not scorn her advice i assure you that it is from a certain point he had taken several notes during mr hardings evidence and i weighted them with the fall you see gentlemen that i was using another name at the head of course the attempted murder of mr eduardo lucas well known that her sister could smell dr roylotts cigar now of course we searched the garden that is certainly the the blinds are up you can see that it was quite shallow but under the jeers of his antagonist but his own explanation as to what took place well really i had no clue in the evening of the colonels face it had got loose was a man of many enemies edmunds told me that our unfortunate client has rather an early appointment this morning or any of the starry flag of freedom which would cause her to 