In [6]:
import re
from collections import Counter

def analyze(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    words = re.findall(r'\b\w+\b', text.lower())
    sentences = re.split(r'[.!?]', text)

    total_word_count = len(words)
    total_character_count = sum(len(word) for word in words)
    avg_word_length = total_character_count / total_word_count if total_word_count > 0 else 0
    
    avg_sentence_length = total_word_count / len(sentences) if len(sentences) > 0 else 0
    
    word_distribution = Counter(words)
    
    unique_words = set(words)
    longest_words = sorted(unique_words, key=len, reverse=True)[:10]
    return {
        "total_word_count": total_word_count,
        "total_character_count": total_character_count,
        "avg_word_length": avg_word_length,
        "avg_sentence_length": avg_sentence_length,
        "word_distribution": word_distribution,
        "longest_words": longest_words,
    }

def save(results, output_file):
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write("Book Analysis Results\n")
        file.write("======================\n")
        file.write(f"Total Word Count: {results['total_word_count']}\n")
        file.write(f"Total Character Count: {results['total_character_count']}\n")
        file.write(f"Average Word Length: {results['avg_word_length']:.2f}\n")
        file.write(f"Average Sentence Length: {results['avg_sentence_length']:.2f} words\n")
        file.write("\nTop 10 Longest Words:\n")
        for word in results['longest_words']:
            file.write(f"{word}\n")
        file.write("\nWord Frequency Distribution (Top 20):\n")
        for word, count in results['word_distribution'].most_common(20):
            file.write(f"{word}: {count}\n")

input_file = "ebook.txt"
output_file = "pride_and_prejudice_analysis.txt"

results = analyze(input_file)

save(results, output_file)

print(f"Analysis complete. Results saved to '{output_file}'.")

Analysis complete. Results saved to 'pride_and_prejudice_analysis.txt'.
