In [1]:
import re
from collections import Counter

# 1. Read the file
def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

file_path = 'cats.txt'
text_content = read_file(file_path)

In [2]:
# 2. Split the text line-by-line (separated by full-stop)
def split_lines(text):
    return text.split('\n')

lines = split_lines(text_content)
print("Lines:", lines)

Lines: ["The joy that people derive from their feline companions is a source of boundless happiness and contentment. Cat owners often find immense delight in the simple yet heartwarming interactions with their furry friends. The gentle purring, the soft nuzzles, and the playful antics of their cats bring about a sense of comfort and companionship that is unmatched. The unconditional love and acceptance that cats offer create a unique bond, fostering a profound sense of happiness in their human counterparts. Whether it's the calming presence of a cat curled up in a sunlit spot or the playful chase of a feather toy, the shared moments between people and their cats become cherished memories that brighten even the dullest days. The satisfaction of providing a loving home to these independent yet affectionate creatures resonates deeply with cat owners, cultivating a joy that goes beyond the ordinary and transforms their homes into havens of happiness."]


In [3]:
# 3. Split the entire text word-by-word (separated by white space)
def split_words(text):
    return re.findall(r'\b\w+\b', text)

words = split_words(text_content)
print("Words:", words)

Words: ['The', 'joy', 'that', 'people', 'derive', 'from', 'their', 'feline', 'companions', 'is', 'a', 'source', 'of', 'boundless', 'happiness', 'and', 'contentment', 'Cat', 'owners', 'often', 'find', 'immense', 'delight', 'in', 'the', 'simple', 'yet', 'heartwarming', 'interactions', 'with', 'their', 'furry', 'friends', 'The', 'gentle', 'purring', 'the', 'soft', 'nuzzles', 'and', 'the', 'playful', 'antics', 'of', 'their', 'cats', 'bring', 'about', 'a', 'sense', 'of', 'comfort', 'and', 'companionship', 'that', 'is', 'unmatched', 'The', 'unconditional', 'love', 'and', 'acceptance', 'that', 'cats', 'offer', 'create', 'a', 'unique', 'bond', 'fostering', 'a', 'profound', 'sense', 'of', 'happiness', 'in', 'their', 'human', 'counterparts', 'Whether', 'it', 's', 'the', 'calming', 'presence', 'of', 'a', 'cat', 'curled', 'up', 'in', 'a', 'sunlit', 'spot', 'or', 'the', 'playful', 'chase', 'of', 'a', 'feather', 'toy', 'the', 'shared', 'moments', 'between', 'people', 'and', 'their', 'cats', 'become'

In [4]:
# 4. Separate the list-of-words (to find out most used keywords)
def find_keywords(words, num_keywords=5):
    word_freq = Counter(words)
    return word_freq.most_common(num_keywords)

keywords = find_keywords(words)
print("Keywords:", keywords)

Keywords: [('a', 9), ('of', 8), ('the', 8), ('their', 6), ('and', 6)]


In [5]:
# 5. Separate the list-of-joining-words (to ignore them)
def remove_stopwords(words):
    stop_words = set([
        'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves',
        'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their',
        'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was',
        'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and',
        'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between',
        'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', ' out', 'on', 'off',
        'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both',
        'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too',
        'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren',
        'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 'shouldn', 'wasn', 'weren', 'won', 'wouldn'
    ])
    return [word for word in words if word.lower() not in stop_words]

filtered_words = remove_stopwords(words)
print("Filtered Words (without stopwords):", filtered_words)

Filtered Words (without stopwords): ['joy', 'people', 'derive', 'feline', 'companions', 'source', 'boundless', 'happiness', 'contentment', 'Cat', 'owners', 'often', 'find', 'immense', 'delight', 'simple', 'yet', 'heartwarming', 'interactions', 'furry', 'friends', 'gentle', 'purring', 'soft', 'nuzzles', 'playful', 'antics', 'cats', 'bring', 'sense', 'comfort', 'companionship', 'unmatched', 'unconditional', 'love', 'acceptance', 'cats', 'offer', 'create', 'unique', 'bond', 'fostering', 'profound', 'sense', 'happiness', 'human', 'counterparts', 'Whether', 'calming', 'presence', 'cat', 'curled', 'sunlit', 'spot', 'playful', 'chase', 'feather', 'toy', 'shared', 'moments', 'people', 'cats', 'become', 'cherished', 'memories', 'brighten', 'even', 'dullest', 'days', 'satisfaction', 'providing', 'loving', 'home', 'independent', 'yet', 'affectionate', 'creatures', 'resonates', 'deeply', 'cat', 'owners', 'cultivating', 'joy', 'goes', 'beyond', 'ordinary', 'transforms', 'homes', 'havens', 'happines

In [6]:
# 6. Bag of words for sentiment analysis
def bag_of_words(text, positive_words, negative_words):
    words = split_words(text)
    word_freq = Counter(words)

    # Count positive and negative words
    positive_count = sum(word_freq[word.lower()] for word in positive_words)
    negative_count = sum(word_freq[word.lower()] for word in negative_words)

    if positive_count > negative_count:
        return 'Positive'
    elif negative_count > positive_count:
        return 'Negative'
    else:
        return 'Neutral'

positive_words = ['good', 'excellent', 'positive','joy', 'delight', 'heartwarming', 'comfort', 'companionship', 'unconditional', 'love', 
                  'acceptance', 'unique', 'bond', 'profound', 'happiness', 'calming', 'playful', 'cherished', 'memories', 'brighten', 'satisfaction', 
                  'providing', 'loving', 'independent', 'affectionate', 'resonates', 'deeply', 'havens']  # Add more positive words
negative_words = ['bad', 'poor', 'negative', 'sadness', 'displeasure', 'discomfort', 'loneliness', 'rejection', 'unpleasant', 'dull', 
                  'ordinary','unpleasant', 'stressful', 'disruptive', 'unhappy', 'unsatisfying']  # Add more negative words


sentiment = bag_of_words(text_content, positive_words, negative_words)
print("Sentiment:", sentiment)

Sentiment: Positive
