<a href="https://colab.research.google.com/github/Leletme/Final_project_CGS2120/blob/main/Final_Project_LeahTucker_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import string
from collections import Counter

def clean_text(text):
    """Cleans the input text by removing punctuation and converting to lowercase.

    Args:
        text: The input text string.

    Returns:
        The cleaned text string.
    """
    return text.lower().translate(str.maketrans("", "", string.punctuation))

def word_count(text):
    """Counts the total number of words in the given text.

    Args:
        text: The input text string.

    Returns:
        The total number of words in the text.
    """
    return len(text.split())

def unique_words(text):
    """Finds the set of unique words in the given text.

    Args:
        text: The input text string.

    Returns:
        A set containing the unique words.
    """
    return set(text.split())

def word_frequency(text):
    """Calculates the frequency of each word in the given text.

    Args:
        text: The input text string.

    Returns:
        A dictionary where keys are words and values are their frequencies.
    """
    return Counter(text.split())

def most_common_words(freq_dict, n):
    """Returns the n most common words and their frequencies.

    Args:
        freq_dict: A dictionary containing word frequencies.
        n: The number of most common words to return.

    Returns:
        A list of tuples, where each tuple contains a word and its frequency.
    """
    return freq_dict.most_common(n)

def sentiment_analysis(text):
    """Performs basic sentiment analysis by comparing positive and negative words.

    Args:
        text: The input text string.

    Returns:
        "Positive", "Negative", or "Neutral" based on the sentiment.
    """
    positive_words = {"good", "happy", "excellent", "great", "fantastic", "positive"}
    negative_words = {"bad", "sad", "terrible", "poor", "negative"}
    words = set(text.split())
    positive_count = len(words & positive_words)
    negative_count = len(words & negative_words)
    if positive_count > negative_count:
        return "Positive"
    elif negative_count > positive_count:
        return "Negative"
    else:
        return "Neutral"

if __name__ == "__main__":
    sample_text = """
    Python is a great programming language. Python is simple, powerful, and versatile.
    It is good for data science, web development, and machine learning. Python's simplicity
    makes it a favorite among developers.
    """
    cleaned_text = clean_text(sample_text)
    freq_dist = word_frequency(cleaned_text)

    print(f"Total Words: {word_count(cleaned_text)}")
    print(f"Unique Words: {unique_words(cleaned_text)}")
    print(f"Frequency Distribution: {freq_dist}")
    print(f"Top 5 Most Common Words: {most_common_words(freq_dist, 5)}")
    print(f"Sentiment Analysis: {sentiment_analysis(cleaned_text)}")

Total Words: 31
Unique Words: {'it', 'science', 'simple', 'is', 'makes', 'web', 'a', 'and', 'good', 'machine', 'powerful', 'simplicity', 'development', 'among', 'language', 'great', 'learning', 'pythons', 'developers', 'data', 'for', 'python', 'favorite', 'programming', 'versatile'}
Frequency Distribution: Counter({'is': 3, 'python': 2, 'a': 2, 'and': 2, 'it': 2, 'great': 1, 'programming': 1, 'language': 1, 'simple': 1, 'powerful': 1, 'versatile': 1, 'good': 1, 'for': 1, 'data': 1, 'science': 1, 'web': 1, 'development': 1, 'machine': 1, 'learning': 1, 'pythons': 1, 'simplicity': 1, 'makes': 1, 'favorite': 1, 'among': 1, 'developers': 1})
Top 5 Most Common Words: [('is', 3), ('python', 2), ('a', 2), ('and', 2), ('it', 2)]
Sentiment Analysis: Positive
