In [None]:
import re
from collections import Counter

def text_analyzer(filename):
    try:
        with open(filename,'r') as file:
            words = file.read()
            if words:
                cleaned_text = re.sub(r'[^\w\s]+','',words.strip())
        return cleaned_text
    except FileNotFoundError:
        return ''
    

def count_words(cleaned_text):
    try:
        return sum(1 for word in cleaned_text.split())
    except (FileNotFoundError,ValueError):
        print('File not found!')


def most_frequent(cleaned_text):
    try:
        word_count = Counter(cleaned_text.lower().split())
        return word_count.most_common(1)
    except FileNotFoundError:
        print('File not found')


def unique_words(cleaned_text):
    try:
        return set(cleaned_text.split())
    except Exception as e:
        print('Error : ',e)


def main():
    filename = 'ml.txt'
    cleaned_text = text_analyzer(filename)

    if cleaned_text:
        print('Number of words : ',count_words(cleaned_text))
        print('Most frequent word : ',most_frequent(cleaned_text))
        print('Unique words : ',unique_words(cleaned_text))
    else:
        print('No text to analyze')
        
if __name__ == '__main__':
    main()

Number of words :  881
Most frequent word :  [('and', 56)]
Unique words :  {'of', 'recall', 'manipulation', 'unsupervised', 'appropriate', 'building', 'route', 'policy', 'to\ncomplex', 'heavily', 'model\npredictions', 'make', 'clean', 'inform\ntrading', 'monitoring', 'supervised', 'relationship', 'methods\nhelp', 'randomized', 'numerical', 'especially', 'with', 'models\nin', 'discrete', 'practicesâ€”such', 'NumPy', 'research', 'error', 'attempt', 'In', 'them', 'Calculus', 'remarkable', 'commonly', 'tools', 'reduce', 'justice', 'GPU', 'calculus\nhelps', 'labels', '--', 'performance\ndegrades', 'summarization', 'conferences', 'classical', 'concepts', 'capabilities', 'driving\n\nAs', 'practice', 'because', 'F1-score', 'generalization', 'interpretability', 'much', 'scaling', 'footprint)', 'growing', 'maintainable', 'tasks', 'provide', 'Feature', 'for', 'input', 'reinforcement', 'anomaly', 'gradients', 'transformer-based', 'theory', 'other', 'TensorFlow', 'useful', 'for\nclassification', 't