# Import Dependencies

In [4]:
import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
from sklearn.feature_extraction.text import TfidfVectorizer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Define Document

In [28]:
sentence = """
Manchester City makes history by winning Club World Cup

Manchester City capped off its incredible year with yet another trophy, dismantling Fluminense 4-0 to win the Club World Cup on Friday.

Having already won the Premier League, Champions League, FA Cup and Super Cup, Pep Guardiola’s side now boasts five trophies this calendar year, becoming the first English club to ever hold all those titles simultaneously.

The final piece of the jigsaw came on a highly charged night in Saudi Arabia as Manchester City outclassed its Brazilian opponents.

“We’ve shown over the past 12 months we are the best team in the world. Our results prove that and the consistency we have managed has been amazing,” club captain Kyle Walker said after the game, per Sky Sports.

“To win these five trophies – for me, the five biggest prizes available to us – is incredible. I am so proud to have been a part of this and I can honestly say it’s an honour to play alongside these players. I couldn’t ask for better teammates.”

It took just 40 seconds for Manchester City to take the lead.

Brazilian left-back Marcelo miscued a pass in the opening exchanges which let Nathan Aké free to shoot from distance. The defender’s effort cannoned back off the post but forward Julián Álvarez was in the right place to turn the rebound into the net with his chest.

City continued to look dangerous and doubled its lead before the break after Phil Foden’s attempted cross was deflected into his own net by Fluminense defender Nino.

Foden then got on the scoresheet himself in the 72nd minute after a prodding home from close range.

The rout was completed in the 88th minute when Álvarez capped off a brilliant performance with a clinical finish into the far corner.

City’s defence was largely untested for during the game, underlining the team’s dominance during this unforgettable year.

“As a manager it is what I am most proud of; that we are always there. No matter how much we win, no matter what trophies we lift, we are there again to fight for the next one,” City boss Guardiola said after the match, according to Sky Sports.

“To win the Treble was truly special, but to win two more trophies and now hold these five major titles shows the unique mentality of this team, of the Club and its fans.

“It is something no other English team has ever achieved, and we will always remember this incredible time we spent together.”

The game ended in some unsavoury scenes as a scuffle broke out between players on the pitch after the final whistle, but the game will be remembered as yet another successful night for City.

The champion heads back to England where it faces a tough title defence in the Premier League.

It currently sits fourth in the table and will face Everton in its next fixture on Wednesday.
"""

# Text Preprocessing

In [29]:
# Sentence tokenization
sent_token = sent_tokenize(sentence)
print(sent_token)

['\nManchester City makes history by winning Club World Cup\n\nManchester City capped off its incredible year with yet another trophy, dismantling Fluminense 4-0 to win the Club World Cup on Friday.', 'Having already won the Premier League, Champions League, FA Cup and Super Cup, Pep Guardiola’s side now boasts five trophies this calendar year, becoming the first English club to ever hold all those titles simultaneously.', 'The final piece of the jigsaw came on a highly charged night in Saudi Arabia as Manchester City outclassed its Brazilian opponents.', '“We’ve shown over the past 12 months we are the best team in the world.', 'Our results prove that and the consistency we have managed has been amazing,” club captain Kyle Walker said after the game, per Sky Sports.', '“To win these five trophies – for me, the five biggest prizes available to us – is incredible.', 'I am so proud to have been a part of this and I can honestly say it’s an honour to play alongside these players.', 'I cou

In [30]:
# Training TF-IDF Vectorizer on sentences
vectorizer = TfidfVectorizer(stop_words='english')
features = vectorizer.fit_transform(sent_token)

# Calculate Sentence Score

In [35]:
# Variable initialization
sent_index = 0
sent_scores = []

# Calculate sentence TF-IDF score
for i in features :
  print(f"- Sentences {sent_index + 1}")
  sent_score = i.sum()
  sent_length = len(i.data)
  print("Sum of sentence TF-IDF score: ", sent_score)
  print("Length of sentence (number of words): ", sent_length)
  avg_score = sent_score/sent_length
  print("Average TF-IDF score: ", avg_score)
  print("-------------------------")
  sent_index +=1
  sent_scores.append(avg_score)

- Sentences 1
Sum of sentence TF-IDF score:  3.8372306490586197
Length of sentence (number of words):  16
Average TF-IDF score:  0.23982691556616373
-------------------------
- Sentences 2
Sum of sentence TF-IDF score:  4.2076776340427875
Length of sentence (number of words):  19
Average TF-IDF score:  0.22145671758119934
-------------------------
- Sentences 3
Sum of sentence TF-IDF score:  3.71282182532486
Length of sentence (number of words):  14
Average TF-IDF score:  0.2652015589517757
-------------------------
- Sentences 4
Sum of sentence TF-IDF score:  2.8148451944954482
Length of sentence (number of words):  8
Average TF-IDF score:  0.35185564931193103
-------------------------
- Sentences 5
Sum of sentence TF-IDF score:  3.588741191885732
Length of sentence (number of words):  13
Average TF-IDF score:  0.2760570147604409
-------------------------
- Sentences 6
Sum of sentence TF-IDF score:  2.4238363903356595
Length of sentence (number of words):  6
Average TF-IDF score:  0.4

In [36]:
len(sent_scores)

20

In [37]:
len(sent_token)

20

# Define Threshold

In [39]:
threshold = sum(sent_scores)/len(sent_scores) # Average of Sentence Scores
threshold

0.31514140188014117

# Get Sentences

In [40]:
# Final summary initialization
final_summ = ""

# Get summary sentences
for index, data in enumerate(sent_scores) :
  if (data >= (threshold)) :
    final_summ = final_summ + " " + sent_token[index]
    print("- ", sent_token[index])

-  “We’ve shown over the past 12 months we are the best team in the world.
-  “To win these five trophies – for me, the five biggest prizes available to us – is incredible.
-  I am so proud to have been a part of this and I can honestly say it’s an honour to play alongside these players.
-  Foden then got on the scoresheet himself in the 72nd minute after a prodding home from close range.
-  “As a manager it is what I am most proud of; that we are always there.
-  The champion heads back to England where it faces a tough title defence in the Premier League.
-  It currently sits fourth in the table and will face Everton in its next fixture on Wednesday.


# Summarization Result

In [43]:
final_summ

' “We’ve shown over the past 12 months we are the best team in the world. “To win these five trophies – for me, the five biggest prizes available to us – is incredible. I am so proud to have been a part of this and I can honestly say it’s an honour to play alongside these players. Foden then got on the scoresheet himself in the 72nd minute after a prodding home from close range. “As a manager it is what I am most proud of; that we are always there. The champion heads back to England where it faces a tough title defence in the Premier League. It currently sits fourth in the table and will face Everton in its next fixture on Wednesday.'