1. Installing Libraries

In [32]:
# pip install U spacy
# python -m spacy download en_core_web_sm

2. Import and Load

In [33]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

stopwords = list(STOP_WORDS)
nlp = spacy.load('en_core_web_sm')

3. Input Text

In [34]:
text = '''
Maria Sharapova has basically no friends as tennis players on the WTA Tour. 
The Russian player has no problems in openly speaking about it and in a recent interview she said: 
I don’t really hide any feelings too much. I think everyone knows this is my job here. 
When I’m on the court playing, I’m a competitor and I want to beat every single person whether they’re 
in the locker room or across the net. So I’m not the one to strike up a conversation about the weather 
and know that in the next few minutes I have to go and try to win a tennis match. 
I’m a pretty competitive girl. I say my hellos, but I’m not sending any players flowers as well. 
Uhm, I’m not really friendly or close to many players. I have not a lot of friends away from the courts. 
When she said she is not really close to a lot of players, is that something strategic that she is doing? 
Is it different on the men’s tour than the women’s tour? No, not at all. 
I think just because you’re in the same sport doesn’t mean that you have to be friends with everyone 
just because you’re categorized as a tennis player. I think every person has different interests. 
I have friends that have completely different jobs and interests, and I’ve met them in very different 
parts of my life. I think everyone just thinks because we’re tennis players we should be the greatest 
of friends. But ultimately, tennis is just a very small part of what we do. 
There are so many other things that we’re interested in, that we do.
'''

4. Process the Document 

In [35]:
doc = nlp(text)

5. Word Frequency Table

In [36]:
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords and word.text.lower() not in punctuation:
        word_lower = word.text.lower()
        if word_lower not in word_frequencies:
            word_frequencies[word_lower] = 1
        else:
            word_frequencies[word_lower] += 1

6. Normalize Frequency

In [37]:
max_freq = max(word_frequencies.values())
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word] / max_freq

7. Score Sentences

In [38]:
sentence_tokens = [sent for sent in doc.sents]
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        word_lower = word.text.lower()
        if word_lower in word_frequencies:
            if sent not in sentence_scores:
                sentence_scores[sent] = word_frequencies[word_lower]
            else:
                sentence_scores[sent] += word_frequencies[word_lower]

8. Select top sentences

In [39]:
select_length = int(len(sentence_tokens) * 0.3)
summary_sentences = nlargest(select_length, sentence_scores, key=sentence_scores.get)


9. Final Summary

In [40]:
final_summary = ' '.join([sent.text.strip() for sent in summary_sentences])
print("🔎 Final Extractive Summary:\n")
print(final_summary)

🔎 Final Extractive Summary:

Maria Sharapova has basically no friends as tennis players on the WTA Tour. So I’m not the one to strike up a conversation about the weather 
and know that in the next few minutes I have to go and try to win a tennis match. I think everyone just thinks because we’re tennis players we should be the greatest 
of friends. I have friends that have completely different jobs and interests, and I’ve met them in very different 
parts of my life. I think just because you’re in the same sport doesn’t mean that you have to be friends with everyone 
just because you’re categorized as a tennis player.
