<img src="https://drive.google.com/uc?export=view&amp;id=1zSJwAUxWv5bxyYLmYPNi-s6M_Wq5iWXh">

## Importing tools

In [1]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [2]:
story_path = "/kaggle/input/sherlock-holmes-stories/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_stories)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [7]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'was in': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [8]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [9]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes i thought perhaps i had come i approached and knocked at the door you forget that 
1.  dear holmes he has lost heavily in dabbling with stocks and that it is art for its own 
2.  dear holmes i thought a little to the prosaic performances of chicago but there is if you will 
3.  dear holmes i have not yet discovered then clearly it is all very clear and as i ascended 
4.  dear holmes that i have clung to morstans share as well as on the underground on tuesday morning 
5.  dear holmes he has now fallen upon evil days he has been left unexplained what became of him 
6.  dear holmes i ejaculated commonplace said holmes though how you attained this result simply by having the good 
7.  dear holmes i have said all i have a fancy to me mr holmes i think we may 
8.  dear holmes he has been in the front of his fortune his age his cheeks haggard and his 
9.  dear holmes what do you suggest any fallacy he could hope when the deed and had taken this 
10.  dear holmes if i do said l

In [10]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear fellow i congratulate you with your theories i remarked that is not here under cover of 
1.  my dear wife knew no sir this is my friend watson has written in the french embassy or 
2.  my dear watson and myself had i been numerous petty thefts holmes snorted his contempt this great and 
3.  my dear mr holmes i met you in forming an opinion thank you i was of irene adler 
4.  my dear holmes and we shall succeed in proving what i feel it and want to see the 
5.  my dear sir knowing the vindictive character of fiend or do you mean by allowing his retreat to 
6.  my dear watson said he as he returned with a knife is surely in your line except for 
7.  my dear fellow it would be buried under the one chair in a way that is very helpful 
8.  my dear mr sherlock holmes then excellent but i assure you that complete frankness between us began to 
9.  my dear watson said the voice are you though he ran all round the centre of the room 
10.  my dear watson i had suspicions of foul play

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would see you know me better you will understand that i tell madam said holmes and now 
1.  i would only return etc the good steiler assured me that no one else had gone rumour reported 
2.  i would ask you a few words with her alone in the pocket of the deceased had been 
3.  i would have shown us into his room where bennett who had known dad in the case seemed 
4.  i would willingly give five hundred said the older man dies he will succeed to a sinister fact 
5.  i would have been hard put to it as peter carey anyhow so i felt some reflection of 
6.  i would have given you one way or the other well the temptation of sudden wealth so easily 
7.  i would some hours in silence for some time that we can clear the whole matter you may 
8.  i would stand by each other in the open door brunton the county police are at liberty to 
9.  i would suggest that it was cracked exceedingly dusty and cheerless with two windows in the new year 
10.  i would leave her in peace and quiet i slipped

In [12]:
print(generate_story(markov_model, start="the case", limit=100))

the case there was no one knows a deal more than i should have looked upon aimless bodily exertion as a waste of energy he tore the mask from his pocket here is a joke sir it is as good as a set of figures before you can imagine and asked my elderly friend to remain at the new patent leathers which i am all that i want to get the number of the stranger there was a different man from the morning which led us to a crisis i have never been a bad job and simply bargained with him that if he were her companions not screen her from his face who are you sure about whether i had on hand there is nothing new under the sun rose slowly above the slates like brick islands in a pompous and manner mr gregson he said i why we are so trivial said dr trevelyan is an assignation we may find something which brought him to westminster it was the coroner said look at the abruptness of the question now arose how i came to my hut in the west wing of the said mordecai smith and tell her story and 
