JSON structure for data

In [None]:
characters = {
    "sherlock": {
        "name": "Sherlock Holmes",
        "occupation": "detective",
        "traits": ["observant", "deductive", "ingenious"]
    },
    "watson": {
        "name": "John Watson",
        "occupation": "doctor",
        "traits": ["loyal", "curious", "narrator"]
    },
    "peterson": {
        "name": "Peterson",
        "occupation": "commissionaire",
        "traits": ["dutiful", "confused"]
    },
    "baker": {
        "name": "Henry Baker",
        "occupation": "",
        "traits": ["forgetful", "unobservant"]
    },
    "ryder": {
        "name": "John Ryder",
        "occupation": "hotel attendant",
        "traits": ["guilty", "desperate", "frightened"]
    },
    "breckinridge": {
        "name": "Mr. Breckinridge",
        "occupation": "goose salesman",
        "traits": ["irritable", "protective"]
    },
    "oakshott": {
        "name": "Mrs. Oakshott",
        "occupation": "goose farmer",
        "traits": ["helpful", "unaware"]
    }
}

objects = {
    "hat": {
        "description": "dirty old black hat",
        "owner": "baker",
        "clues": ["big size", "worn", "initials HB", "missing elastic", "dusty", "ink stains", "lime-cream residue"]
    },
    "goose": {
        "description": "white goose with black barred tail",
        "origin": "oakshott",
        "contents": ["blue carbuncle"]
    },
    "blue_carbuncle": {
        "description": "precious stone",
        "owner": "countess of morcar",
        "stolen": True,
        "recovered": True
    }
}

locations = {
    "221b baker street": {
        "description": "Sherlock's residence",
        "events": ["initial investigation", "baker arrives", "ryder arrives", "conclusion"]
    },
    "hotel cosmopolitan": {
        "description": "Luxury hotel",
        "events": ["theft of blue carbuncle", "ryder calls police"]
    },
    "alpha inn": {
        "description": "Pub with goose club",
        "events": ["baker receives goose"]
    },
    "covent garden": {
        "description": "Market",
        "events": ["baker buys goose", "sherlock visits breckinridge"]
    },
    "brixton road": {
        "description": "Oakshott's residence",
        "events": ["ryder hides carbuncle in goose"]
    }
}

plot = [

    {"characters": ["sherlock", "watson"], "location": "221b baker street", "event": "sherlock examines hat"},
    {"characters": ["sherlock", "watson"], "location": "221b baker street", "event": "sherlock deduces hat owner"},
    {"characters": ["sherlock", "watson"], "location": "221b baker street", "event": "peterson arrives with goose"},
    {"characters": ["sherlock"], "location": "221b baker street", "event": "sherlock discovers blue carbuncle in goose"},
    {"characters": ["sherlock", "watson"], "location": "newspaper", "event": "sherlock reads about theft"},
    {"characters": ["sherlock", "baker"], "location": "221b baker street", "event": "baker arrives for hat and goose"},
    {"characters": ["sherlock", "watson"], "location": "221b baker street", "event": "sherlock places advertisement"},
    {"characters": ["sherlock", "ryder"], "location": "221b baker street", "event": "ryder arrives for goose"},
    {"characters": ["sherlock", "watson"], "location": "alpha inn", "event": "sherlock investigates goose origin"},
    {"characters": ["sherlock", "watson"], "location": "covent"}
]



Predicates

In [None]:

def is_suspect(character):
    return character in ["ryder", "horner"]

def is_owner(item, character):
    return objects[item]["owner"] == character

def has_item(character, item):
    return "contents" in objects[item] and character in objects[item]["contents"]

def occurred_before(event1, event2):
    return locations[event1]["events"].index(event1) < locations[event2]["events"].index(event2)

def is_stolen(item):
    return "stolen" in objects[item] and objects[item]["stolen"]

def is_recovered(item):
    return "recovered" in objects[item] and objects[item]["recovered"]

def is_character_in_location(character, location):
    return "characters" in plot[location] and character in plot[location]["characters"]

def is_character_present_in_event(character, event):
    for location in locations:
        if event in locations[location]["events"]:
            return is_character_in_location(character, location)
    return False



Rules

In [None]:

def is_suspicious(character):
    return is_suspect(character) or is_owner("blue_carbuncle", character)

def is_innocent(character):
    return not is_suspicious(character)

def is_connected(character1, character2):
    return occurred_before("ryder arrives", "ryder leaves") and is_character_present_in_event(character1, "ryder arrives") and is_character_present_in_event(character2, "ryder leaves")

def is_liar(character):
    return is_suspect(character) and not is_recovered("blue_carbuncle")

def is_truthful(character):
    return not is_liar(character)

def is_involved(character):
    return is_liar(character) or has_item(character, "blue_carbuncle")

def is_innocent_owner(character):
    return is_owner("blue_carbuncle", character) and is_recovered("blue_carbuncle")




Importing necessary libraries

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk.chunk import ne_chunk
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk import pos_tag


In [None]:
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
# Initializing lemmatizer and stemmer
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

Function for preprocessing tokens using lemmatization and stemming

In [None]:
def preprocess_tokens(tokens):
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    stemmed_tokens = [stemmer.stem(token) for token in lemmatized_tokens]
    return stemmed_tokens

def match_pattern(tokens, pattern):
    return all(word in tokens for word in pattern)

Functions for handling questions

In [None]:
def answer_who_stole_diamond():
    for character in characters:
        if is_suspect(character):
            return f"{characters[character]['name']} is suspected of stealing the blue carbuncle."
    return "The thief's identity is still under investigation."

def answer_how_did_they_steal_it():
    if is_recovered("blue_carbuncle"):
        return "The blue carbuncle was recovered, so how it was stolen is now known. As Sherlock discovered, John Ryder and Catherine Cusack broke into the Countess' room and framed Horner for the theft."
    elif is_suspect("ryder"):
        return "John Ryder and Catherine Cusack broke into the Countess' room and framed Horner for the theft."
    else:
        return "The method of theft is still being investigated."

def answer_where_is_diamond():
    if is_stolen("blue_carbuncle") and not is_recovered("blue_carbuncle"):
        return "The blue carbuncle is missing and its location is currently unknown."
    elif is_recovered("blue_carbuncle"):
        return "The blue carbuncle has been recovered and is now safe."
    else:
        return "The current location of the blue carbuncle is uncertain."



In [None]:
def answer_what_are_traits(character):
    if character in characters:
        return f"{characters[character]['name']} is known for being {', '.join(characters[character]['traits'])}."
    else:
        return "Character not found."

def answer_what_is_occupation(character):
    if character in characters:
        return f"{characters[character]['name']} works as a {characters[character]['occupation']}."
    else:
        return "Character not found."

def answer_describe_character(character):
    return describe_character(character)

def describe_character(character):
    if character in characters:
        description = characters[character]['name'] + " is "
        if characters[character]['occupation']:
            description += "a " + characters[character]['occupation'] + " "
        description += "and known for being " + ", ".join(characters[character]['traits']) + "."
        return description
    else:
        return "Character not found."



In [None]:
def preprocess_question(question):
    question = question.lower().replace("?", "")  # Convert to lowercase and remove question mark
    tokens = word_tokenize(question)  # Tokenize the question
    return tokens

# Extracting entities using NLTK's named entity recognition (NER)
def extract_entities(tokens):
    entities = ne_chunk(nltk.pos_tag(tokens))
    # Extracting named entities (PERSON, ORGANIZATION, GPE, etc.)
    names = [entity for entity in entities if isinstance(entity, nltk.Tree) and entity.label() == 'PERSON']
    return [name[0] for name in names]  # Extracting the names only

# Function for finding synonyms and related words using NLTK's WordNet
def find_synonyms(word):
    synonyms = set()
    for synset in wordnet.synsets(word):
        for lemma in synset.lemmas():
            synonyms.add(lemma.name())
    return synonyms



In [None]:
def match_pattern(tokens, pattern):
    return all(word in tokens for word in pattern)

def extract_character(tokens):
    characters = set(characters.keys())
    relevant_tokens = [token for token in tokens if token in characters]
    if relevant_tokens:
        return relevant_tokens[0]
    else:
        return None


CHATBOT

In [None]:
def chatbot(question):
    tokens = preprocess_question(question)
    entities = extract_entities(tokens)
    print("Preprocessed tokens:", tokens)

    if "who stole the diamond" in question:
        return answer_who_stole_diamond()
    if "how did they steal it" in question:
        return answer_how_did_they_steal_it()
    if "where is the diamond" in question:
        return answer_where_is_diamond()
    if "describe" in tokens:
        character_index = tokens.index("describe") + 1
        if character_index < len(tokens):
            character = tokens[character_index]
            return answer_describe_character(character)

    if "traits" in tokens and "of" in tokens:
        of_index = tokens.index("of")
        if of_index < len(tokens) - 1:
            character = tokens[of_index + 1]
            return answer_what_are_traits(character)

    if "occupation" in tokens and "of" in tokens:
        occupation_index = tokens.index("occupation")
        character_index = occupation_index + 1
        if "of" in tokens[occupation_index:]:
            character_index = tokens.index("of", occupation_index)
        if character_index < len(tokens) - 1:
            character_index += 1
            character = tokens[character_index]
            return answer_what_is_occupation(character)
    else:
      return "Sorry, I didn't learn that yet"

In [None]:
def main():
    print("Welcome to the Sherlock Holmes Chatbot!")
    print("Ask me anything about the story, and I'll do my best to answer.")

    while True:
        question = input("Your question: ").lower()
        if question == "exit":
            print("Goodbye!")
            break
        response = chatbot(question)
        print("Sherlock Holmes Chatbot:", response)

if __name__ == "__main__":
    main()

Welcome to the Sherlock Holmes Chatbot!
Ask me anything about the story, and I'll do my best to answer.
Preprocessed tokens: ['describe', 'sherlock']
Sherlock Holmes Chatbot: Sherlock Holmes is a detective and known for being observant, deductive, ingenious.
Preprocessed tokens: ['waht', 'are', 'the', 'traits', 'of', 'watson']
Sherlock Holmes Chatbot: John Watson is known for being loyal, curious, narrator.
Preprocessed tokens: ['who', 'stole', 'the', 'diamond']
Sherlock Holmes Chatbot: John Ryder is suspected of stealing the blue carbuncle.
Preprocessed tokens: ['what', 'is', 'the', 'occupation', 'of', 'ryder']
Sherlock Holmes Chatbot: John Ryder works as a hotel attendant.
Preprocessed tokens: ['how', 'did', 'they', 'steal', 'it']
Sherlock Holmes Chatbot: The blue carbuncle was recovered, so how it was stolen is now known. As Sherlock discovered, John Ryder and Catherine Cusack broke into the Countess' room and framed Horner for the theft.
