###  Text Analysis

In [None]:
from collections import Counter
import string
import matplotlib.pyplot as plt

# Text cleaning function
def clean_text(text, remove_punctuation=True, remove_digits=False):
    text = text.lower()  # Lowercase all letters
    if remove_punctuation:
        text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation marks
    if remove_digits:
        text = text.translate(str.maketrans('', '', string.digits))  # Extract the numbers
    return text

def plot_word_frequency(word_count):
    """Plot the frequency of the most common words"""
    # Find the maximum frequency
    max_frequency = max(word_count.values())
    
    # Find all words with the maximum frequency
    most_common_words = [(word, count) for word, count in word_count.items() if count == max_frequency]
    
    words, counts = zip(*most_common_words) if most_common_words else ([], [])
    
    plt.figure(figsize=(6, 2))
    plt.bar(words, counts, color='purple',width=0.7)
    plt.xlabel('Words')
    plt.ylabel('Frequency')
    plt.title(f'Most Common Words with Frequency: {max_frequency}')
    plt.xticks(rotation=45, ha='right')
    #plt.tight_layout()
    plt.show()

def main():
    try:
        # Texts used as a database
        text_data = [
            "Data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge.",
            "Python is a programming language that lets you work quickly and integrate systems more effectively.",
            "Machine learning is a method of data analysis that automates analytical model building."
        ]
        
        # Combines all texts
        combined_text = " ".join(text_data)
        
        # Cleaning options
        remove_punctuation = True
        remove_digits = True
        
        # Text cleaning
        cleaned_text = clean_text(combined_text, remove_punctuation, remove_digits)
        
        # Splits the cleaned text into words
        words = [word for word in cleaned_text.split() if word]  
        
        # Calculates the frequency of words
        word_count = Counter(words)
        
        # Shows word frequencies
        print("\nWord Count:", word_count)
        
        # Showing the most repeated words
        plot_word_frequency(word_count)
    
    except ValueError:
        print("Invalid input. Please enter numeric values where required.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()
