In [10]:
import nltk
from nltk.tokenize import RegexpTokenizer, sent_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
from nltk.stem.snowball import SnowballStemmer
import operator

In [11]:
stop_words = set(stopwords.words('english'))
tokenizer = RegexpTokenizer(r'\w+')
stemmer = SnowballStemmer('english')

In [12]:
# Read the files
with open('gift-of-magi.txt', 'r') as f1, open('the-skylight-room.txt', 'r') as f2, open('the-cactus.txt', 'r') as f3:
    text1 = f1.read()
    text2 = f2.read()
    text3 = f3.read()

In [13]:
# Tokenize and preprocess the text
def preprocess(text):
    tokens = tokenizer.tokenize(text)
    tokens = [stemmer.stem(token.lower()) for token in tokens if token.isalpha() and not token.isdigit() and token.lower() not in stop_words]
    return tokens

In [14]:
tk1 = preprocess(text1)
tk2 = preprocess(text2)
tk3 = preprocess(text3)

In [15]:
# Calculate word frequencies
index1 = nltk.FreqDist(tk1)
index2 = nltk.FreqDist(tk2)
index3 = nltk.FreqDist(tk3)

In [16]:
# Combine all tokens to calculate document frequencies
comb = tk1 + tk2 + tk3
cindex = nltk.FreqDist(comb)

In [17]:
# Split documents into sentences
sent1 = sent_tokenize(text1)
sent2 = sent_tokenize(text2)
sent3 = sent_tokenize(text3)

In [18]:
# Get query from user and process it
raw_query = input("Please enter a query: ")
query_tokens = preprocess(raw_query)
print("Processed query:", query_tokens)

Please enter a query: words
Processed query: ['word']


In [19]:
# Calculate similarity score between query and sentence
def calculate_similarity(word, query):
    word_synsets = wn.synsets(word)
    if word_synsets:
        query_synsets = wn.synsets(query)
        if query_synsets:
            similarity_scores = [word_synset.wup_similarity(query_synset) for word_synset in word_synsets for query_synset in query_synsets]
            avg_similarity = sum(similarity_scores) / len(similarity_scores)
            return avg_similarity
    return 0

In [20]:
# Calculate scores for each sentence in each document
def calculate_scores(sentences, index, mu):
    scores = {}
    for sentence in sentences:
        words = preprocess(sentence)
        score = sum(index[word] / (1 + cindex[word]) for word in words)
        query_similarity = sum(calculate_similarity(word, query_word) for word in words for query_word in query_tokens)
        query_similarity /= len(words) + 1
        scores[sentence] = mu * score + (1 - mu) * query_similarity
    return scores

In [21]:
mu1 = 0.08
mu2 = 0.03
mu3 = 0.2

scores1 = calculate_scores(sent1, index1, mu1)
scores2 = calculate_scores(sent2, index2, mu2)
scores3 = calculate_scores(sent3, index3, mu3)

In [25]:
# Print summaries
print('\nGift of Magi Summary:')
for sentence, score in scores1.items():
    if score >= 9 * mu1:
        print(sentence)


Gift of Magi Summary:
Pennies saved one and two at a time by bulldozing the grocer and the vegetable man and the butcher until one's cheeks burned with the silent imputation of parsimony that such close dealing implied.
Now, when the income was shrunk to $20, the letters of "Dillingham" looked blurred, as though they were thinking seriously of contracting to a modest and unassuming D. But whenever Mr. James Dillingham Young came home and reached his flat above he was called "Jim" and greatly hugged by Mrs. James Dillingham Young, already introduced to you as Della.
A very thin and very agile person may, by observing his reflection in a rapid sequence of longitudinal strips, obtain a fairly accurate conception of his looks.
Had the Queen of Sheba lived in the flat across the airshaft, Della would have let her hair hang out of the window some day to dry just to depreciate Her Majesty's jewels and gifts.
Had King Solomon been the janitor, with all his treasures piled up in the basement, 

In [23]:
print('\nThe Skylight Room Summary:')
for sentence, score in scores2.items():
    if score >= 19 * mu2:
        print(sentence)


The Skylight Room Summary:
Convinced by her second-floor manner that it was worth the $12 that Mr. Toosenberry always paid for it until he left to take charge of his brother's orange plantation in Florida near Palm Beach, where Mrs. McIntyre always spent the winters that had the double front room with private bath, you managed to babble that you wanted something still cheaper.


In [24]:
print('\nThe Cactus Summary:')
for sentence, score in scores3.items():
    if score >= 2 * mu3:
        print(sentence)


The Cactus Summary:
The most notable thing about Time is that it is so purely relative.
A large amount of reminiscence is, by common consent, conceded to the drowning man; and it is not past belief that one may review an entire courtship while removing one's gloves.
That is what Trysdale was doing, standing by a table in his bachelor apartments.
On the table stood a singular-looking green plant in a red earthen jar.
The plant was one of the species of cacti, and was provided with long, tentacular leaves that perpetually swayed with the slightest breeze with a peculiar beckoning motion.
Trysdale's friend, the brother of the bride, stood at a sideboard complaining at being allowed to drink alone.
White favors like stars upon their coats shone through the gloom of the apartment.
As he slowly unbuttoned his gloves, there passed through Trysdale's mind a swift, scarifying retrospect of the last few hours.
It seemed that in his nostrils was still the scent of the flowers that had been banke