In [None]:
import string
import os

In [1]:
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import re
import os
import sys
import string

def calculate_stats(poem_text):
    cleaned_text=poem_text.replace("--"," ")
    words=cleaned_text.lower().split()
    word_count=len(words)
    total_chars=sum(len(word) for word in words)
    if word_count > 0:
        average_word_length = total_chars/ word_count
    else:
        average_word_length = 0
    return {
        "word_count": word_count,
        "average_word_length": average_word_length
    }

try:
    with open("../data/clean_poems.txt", "r", encoding="utf-8") as f:
        poem_text = f.read()
    stats = calculate_stats(poem_text)
    print(f"Total Word Count: {stats['word_count']}")
    print(f"Average Word Length: {stats['average_word_length']:.2f} characters")

except FileNotFoundError:
    print("Error: The file was not found.")

def load_cmudict(filepath="../data/cmudict.txt"):
    print(f"Loading phonetic dictionary from {filepath}...")
    pronunciations = {}
    variant_regex = re.compile(r'\(\d+\)$')
    allowed_chars_regex = re.compile(r"[^A-Z']") 
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip() 
                if line.startswith(';;;') or not line: 
                    continue
                parts = line.split(maxsplit=1) 
                if len(parts) < 2: 
                    continue
                word = parts[0].strip()
                word = variant_regex.sub('', word)
                word = word.upper()
                word = allowed_chars_regex.sub('', word) 
                if not word:
                    continue
                    
                phonemes_str = parts[1].strip() 
                phonemes = phonemes_str.split() 
                if word not in pronunciations:
                    pronunciations[word] = phonemes
                    
        if not pronunciations:
             print("Warning: The dictionary appears empty after loading. Check the file format.")
             return None
        print(f"Dictionary loaded successfully. {len(pronunciations)} words found.")
        return pronunciations
        
    except FileNotFoundError:
        print(f"Error: The dictionary file '{filepath}' was not found.")
        return None
    
def detect_rhyme_scheme(stanza, pronunciation_dict):
    if not pronunciation_dict:
        return "Error: Dictionary not loaded."
    lines = stanza.strip().split('\n')
    last_word_sounds = []

    for line in lines:
        words = line.split()
        if words:
            clean_word = words[-1].upper().strip(string.punctuation)
            if clean_word in pronunciation_dict:
                phonemes = pronunciation_dict[clean_word]
                last_stress_index = -1
                for i in range(len(phonemes) - 1, -1, -1):
                    if phonemes[i][-1] in ('1', '2'):
                        last_stress_index = i
                        break
                if last_stress_index != -1:
                    rhyming_part = tuple(phonemes[last_stress_index:])
                    last_word_sounds.append(rhyming_part)
                else:
                    last_word_sounds.append(tuple(phonemes))
            else:
                last_word_sounds.append(None)
        else:
             last_word_sounds.append(None)

    rhyme_groups = {}
    scheme = []
    next_rhyme_label = 'A'

    for sounds in last_word_sounds:
        if sounds is None:
            scheme.append('X')
            continue
        if sounds in rhyme_groups:
            scheme.append(rhyme_groups[sounds])
        else:
            rhyme_groups[sounds] = next_rhyme_label
            scheme.append(next_rhyme_label)
            next_rhyme_label = chr(ord(next_rhyme_label) + 1)
            
    return "".join(scheme)

cmudict = load_cmudict()

#Testing phonetics
if cmudict:
    try:
        sample_stanza="""
        Because I could not stop for Death,
        He kindly stopped for me;
        The carriage held but just ourselves
        And Immortality.
        """
        rhyme_scheme = detect_rhyme_scheme(sample_stanza, cmudict)
        print(f"The rhyme scheme of the sample stanza is: {rhyme_scheme}")

    except FileNotFoundError:
        print("Error: 'clean_poems.txt' was not found.")
else:
    print("Exiting because the phonetic dictionary could not be loaded.")


def find_alliteration(line):
    stop_words = set([
        'a', 'an', 'the', 'in', 'on', 'at', 'to', 'for', 'of', 
        'is', 'am', 'are', 'was', 'were', 'be', 'been', 'being',
        'and', 'or', 'but', 'if', 'as', 'by', 'with', 'from',
        'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 
        'her', 'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their' 
    ])
    translator = str.maketrans('', '', string.punctuation)
    cleaned_line = line.lower().translate(translator)
    words = cleaned_line.split()
    alliterations = []
    num_words = len(words)
    for i in range(num_words - 1):
        current_word = words[i]
        if not current_word or current_word in stop_words:
            continue
        for j in range(i + 1, num_words):
            next_word = words[j]
            if not next_word:
                continue
            if next_word in stop_words:
                continue
            if current_word[0] == next_word[0]:
                alliterations.append((current_word, next_word))
            break 
    return alliterations

# Testing find_alliterations
try:
    sample_line = "Success is counted sweetest by those who ne'er succeed."
    alliterative_pairs = find_alliteration(sample_line)
            
    if alliterative_pairs:
        print(f"Found alliteration in the line: '{sample_line}'")
        for pair in alliterative_pairs:
            print(f"  - {pair[0]} / {pair[1]}")
    else:
        print(f"No simple alliteration found in the line: '{sample_line}'")

except FileNotFoundError:
        print("Error: 'clean_poems.txt' was not found.")

Total Word Count: 28906
Average Word Length: 4.46 characters
Loading phonetic dictionary from ../data/cmudict.txt...
Dictionary loaded successfully. 125633 words found.
The rhyme scheme of the sample stanza is: ABCD
No simple alliteration found in the line: 'Success is counted sweetest by those who ne'er succeed.'


In [2]:
def prepare_text(filepath="../data/clean_poems.txt"):
    if not os.path.exists(filepath):
        print(f"Error: The file '{filepath}' was not found.")
        return None
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            text=f.read()
            translator=str.maketrans('','',string.punctuation)
            cleaned_text=text.lower().translate(translator)
            words=cleaned_text.split()
            words=[word for word in words if word]
            print(f"Found {len(words)} tokens.")
            return words
    except Exception as e:
        print(f"An error occurred during text preparation: {e}")
        return None

In [3]:
tokens=prepare_text()
print(tokens[:20])

Found 28897 tokens.
['this', 'is', 'my', 'letter', 'to', 'the', 'world', 'that', 'never', 'wrote', 'to', 'me', 'the', 'simple', 'news', 'that', 'nature', 'told', 'with', 'tender']


In [4]:
import random

def build_markov_model(tokens):
    model={}
    for i in range(len(tokens)-1):
        current_word=tokens[i]
        next_word=tokens[i+1]
        model[current_word] = model.get(current_word, []) + [next_word]
    print("Number of unique keys: ",len(model))
    return model

In [5]:
markov_model=build_markov_model(tokens)
sample_key = 'the'
print(f"Words following '{sample_key}':")
print(markov_model[sample_key][:10]) 
sample_key_2 = 'love'
print(f"\nWords following '{sample_key_2}':")
print(markov_model[sample_key_2][:10])

Number of unique keys:  5524
Words following 'the':
['world', 'simple', 'request', 'authors', 'purple', 'flag', 'definition', 'distant', 'victory', 'worst']

Words following 'love':
['of', 'we', 'but', 'i', 'a', 'poured', 'i', 'enough', 'alway', 'is']


In [6]:
def generate_text(model, tokens, length=50):    
    possible_starts = list(model.keys())
    if not possible_starts:
        print("Error: Model has no keys to start from.")
        return None
    current_word = random.choice(possible_starts)
    generated = [current_word]
    for _ in range(length - 1):
        if current_word in model:
            possible_next_words = model[current_word]
            next_word = random.choice(possible_next_words)
            generated.append(next_word)
            current_word = next_word
        else:
            break
    return " ".join(generated)

In [7]:
generated_poem_text = generate_text(markov_model, tokens, length=100) 
print(generated_poem_text)

rest their coming and deem to tie to the chosen child oftener through the summers morn a door i strove to that bore the candle or arctic creature nodding in a hundred years sundered tune without a simple news is that april candid in dishonored grass no means imagined by one consummate bill an amethyst nights wild now that held low to perceive new england town so far the spot and im wife ive got my abode empowered with strict economy subsists till his it is endless be a listener admonished by and rowed him to earths to surmise and


In [10]:
# Cell 4: The Main Chatbot Loop

# --- Make sure functions from Phase 2 are available ---
# (We need detect_rhyme_scheme and find_alliteration)
# (If analyzer.py is in the same directory, you could import them, 
# but for simplicity in a notebook, let's redefine them briefly here 
# or ensure they were defined in previous cells if you combined files)

# --- Assuming these variables exist from previous cells ---
# cmudict = load_cmudict() # From Phase 2
# tokens = prepare_text_for_markov() # From Phase 3, Step 1
# markov_model = build_markov_model(tokens) # From Phase 3, Step 2
# generate_text = ... # Function from Phase 3, Step 3
# detect_rhyme_scheme = ... # Function from Phase 2, Step 2 (Revised)
# find_alliteration = ... # Function from Phase 2, Step 3 (Advanced)
print("\n--- Artisan PoetBot Initialized ---")
print("Type 'analyze', 'generate', or 'quit'.")

while True:
    try:
        user_choice = input("\nWhat would you like to do? (analyze/generate/quit): ").lower().strip()

        if user_choice == 'quit':
            print("Goodbye!")
            break # Exit the loop

        elif user_choice == 'analyze':
            print("\nPlease paste the poem stanza you want to analyze.")
            print("(Enter an empty line when you are finished):")
            
            stanza_lines = []
            while True:
                line = input()
                if line == "":
                    break
                stanza_lines.append(line)
            
            input_stanza = "\n".join(stanza_lines)
            
            if not input_stanza.strip():
                print("No stanza provided.")
                continue

            print("\n--- Analysis Results ---")
            # Call Rhyme Scheme Detector
            if cmudict:
                scheme = detect_rhyme_scheme(input_stanza, cmudict)
                print(f"Detected Rhyme Scheme: {scheme}")
            else:
                 print("Cannot detect rhyme scheme (dictionary not loaded).")

            # Call Alliteration Finder (line by line)
            print("Alliteration Found:")
            found_any_alliteration = False
            for line_num, line_text in enumerate(input_stanza.split('\n')):
                 pairs = find_alliteration(line_text)
                 if pairs:
                     found_any_alliteration = True
                     print(f"  Line {line_num + 1}:")
                     for pair in pairs:
                         print(f"    - {pair[0]} / {pair[1]}")
            if not found_any_alliteration:
                 print("  None detected with the current rule.")


        elif user_choice == 'generate':
            print("\n--- Generating Poem Snippet (Markov Chain) ---")
            if markov_model and tokens:
                # Generate 50 words this time
                generated_output = generate_text(markov_model, tokens, length=50) 
                if generated_output:
                    print(generated_output)
                else:
                    print("Could not generate text.")
            else:
                print("Cannot generate text (model not ready).")

        else:
            print("Invalid choice. Please type 'analyze', 'generate', or 'quit'.")

    except EOFError: # Handles pressing Ctrl+D or similar to end input
         print("\nInput ended unexpectedly. Goodbye!")
         break
    except KeyboardInterrupt: # Handles pressing Ctrl+C
         print("\nInterrupted by user. Goodbye!")
         break
    except Exception as e:
         print(f"\nAn unexpected error occurred: {e}")
         # Optionally, you might want to break or add more error handling
         # break


--- Artisan PoetBot Initialized ---
Type 'analyze', 'generate', or 'quit'.

Please paste the poem stanza you want to analyze.
(Enter an empty line when you are finished):

--- Analysis Results ---
Detected Rhyme Scheme: AABCCDDBCC
Alliteration Found:
  Line 5:
    - rock / rock
  Line 7:
    - care / care
  Line 10:
    - rock / rock
Goodbye!
