In [3]:
import re

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    return text.lower()

def calculate_statistics(text):
    cleaned_text = clean_text(text)
    words = re.findall(r'\b\w+\b', cleaned_text)
    sentences = re.split(r'[.!?]', text)

    total_words = len(words)
    total_characters = sum(len(word) for word in words)
    total_sentences = len(sentences)
    avg_word_length = total_characters / total_words if total_words > 0 else 0
    avg_sentence_length = total_words / total_sentences if total_sentences > 0 else 0

    return {
        "total_words": total_words,
        "total_characters": total_characters,
        "total_sentences": total_sentences,
        "avg_word_length": avg_word_length,
        "avg_sentence_length": avg_sentence_length
    }

def display_statistics(stats, title):
    print(f"Statistics for {title}")
    print("=" * (13 + len(title)))
    print(f"Total Words: {stats['total_words']}")
    print(f"Total Characters: {stats['total_characters']}")
    print(f"Total Sentences: {stats['total_sentences']}")
    print(f"Average Word Length: {stats['avg_word_length']:.2f}")
    print(f"Average Sentence Length: {stats['avg_sentence_length']:.2f} words")
    print()

def main():
    shakespeare_file = "shakespeare.txt" 
    bacon_file = "bacon.txt" 

    with open(shakespeare_file, 'r', encoding='utf-8') as file:
        shakespeare_text = file.read()

    with open(bacon_file, 'r', encoding='utf-8') as file:
        bacon_text = file.read()

    shakespeare_stats = calculate_statistics(shakespeare_text)
    bacon_stats = calculate_statistics(bacon_text)

    display_statistics(shakespeare_stats, "Shakespeare's Work")
    display_statistics(bacon_stats, "Bacon's Work")

if __name__ == "__main__":
    main()


Statistics for Shakespeare's Work
Total Words: 29575
Total Characters: 122288
Total Sentences: 2366
Average Word Length: 4.13
Average Sentence Length: 12.50 words

Statistics for Bacon's Work
Total Words: 52237
Total Characters: 230110
Total Sentences: 1827
Average Word Length: 4.41
Average Sentence Length: 28.59 words

