## Importing tools

In [6]:
import numpy as np
import pandas as pd
import os
import re
import nltk
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

In [9]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to C:\Users\Benouaklil
[nltk_data]     Hodhaifa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

## Reading every Sherlock Holmes adventure!

In [10]:
story_path = "./sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [11]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [12]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [13]:
markov_model = make_markov_model(cleaned_stories)

In [14]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [15]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'your letter': 0.02702702702702703, 'was up': 0.09009009009009009, 'is afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'was in': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.027027027027027

## Generating Sherlock Holmes stories!

In [16]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [17]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes my previous letters and is now evident were to prevent anything like public exposure of private 
1.  dear holmes said i thought that nothing could be seen except my own and it is quite certain 
2.  dear holmes i ejaculated my dear dear son now that he knew that i took some time before 
3.  dear holmes if i call upon you and i and i have already said that i was very 
4.  dear holmes oh yes i know where all gossip is welcome this weakness of his nature it was 
5.  dear holmes and tell her these things and followed him closely and to keep herself in your room 
6.  dear holmes i exclaimed in unfeigned admiration it is so common in an english coat frayed at the 
7.  dear holmes said i precisely he opened the door to say that i make my bow and return 
8.  dear holmes i exclaimed perhaps one of those marks on her arm that you make such reparation as 
9.  dear holmes if i can i be of value you can see the two charts all ready signed 
10.  dear holmes i thought of all our diffic

In [18]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear holmes i went to bed and the note with which the crime was committed so as 
1.  my dear watson i think you can find nothing against him either here in my pocket there i 
2.  my dear watson i am very much so and the adventure of the musgrave ritual arthur conan doyle 
3.  my dear watson i think we are a good fellow said holmes it is very essential to me 
4.  my dear mycroft the brothers life is more than these folk here in spite of the door mr 
5.  my dear young lady we have so much for the man was crouching at the window the card 
6.  my dear inspector you have formed any explanation of his had the hint from holmes that you can 
7.  my dear sir it is inconceivable that it was finally agreed however that he was afraid of no 
8.  my dear fellow we imagine that she acted in this if you told them that he must act 
9.  my dear watson you have never gone out precisely yes i met theresa wright is her ring it 
10.  my dear fellow what do the criminal part its not been fed for two d

In [19]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would have thought it and that is what mr lestrade of scotland yard officials messrs lestrade and 
1.  i would not leave a tongue with power to wag in a despairing voice a fortnight went by 
2.  i would prove it anyhow but what do you think of miss minnie warrender tut you will find 
3.  i would ask you to have a word or moving a muscle from morning to night on these 
4.  i would spend my life hiking round the old village inn he talked slowly and at least throw 
5.  i would not have been driven to ask you one or two more points to bring the facts 
6.  i would not have me leave it a ball of stout twine i think he is an elderly 
7.  i would not i it is a very pretty hash you have excluded the impossible whatever remains however 
8.  i would always carry the case pray let me hear the end mr mac it is less than 
9.  i would rather not do so and asked me rather an irregular pioneer who goes in front of 
10.  i would not do it there comes a time when my father made the parting easier t

In [20]:
print(generate_story(markov_model, start="the case", limit=100))

the case in his rooms of a certain window but the weather wait a bit though theres the varnish too like earth on each side of his brow was all changed when he understood the relations between us i am an omnivorous reader with a strangely retentive memory for trifles he answered laughing besides we may be comfortable in the fifth year and has got a glimpse of by the heels in the italian colony he had once contained the pure soul of lucy ferrier the wanderer explained me and made my dark room up there presently i think colonel that you had intended before his departure by the way she came to mr garcia wisteria lodge it was nearly midway between the oak that must have been examined in every way signor castalotte was a double stream upon the of the few clear days which our start had given jefferson hope lingered among the mountains for two as soon as you what happened then she died just a week in london or has impressed those who were as elated as if he were in baker street that will do sa