In [1]:
import bs4 as BeautifulSoup
import urllib.request  
import string
import math
import nltk
import numpy as np
import networkx as nx
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))
from nltk.stem import PorterStemmer
stem = PorterStemmer()
from nltk.tokenize import word_tokenize, sent_tokenize
nltk.download('punkt')
from nltk.cluster.util import cosine_distance

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [2]:
def sentence_similarity(sent1, sent2, stopwords=None):
	if stopwords is None:
		stopwords = []
	sent1 = [w.lower() for w in sent1]
	sent2 = [w.lower() for w in sent2]

	all_words = list(set(sent1 + sent2))
	vector1 = [0] * len(all_words)
	vector2 = [0] * len(all_words)
	# build the vector for the first sentence
	for w in sent1:
		if w in stopwords:
			continue
		vector1[all_words.index(w)] += 1
	# build the vector for the second sentence
	for w in sent2:
		if w in stopwords:
			continue
		vector2[all_words.index(w)] += 1
	return 1 - cosine_distance(vector1, vector2)

In [3]:
def build_similarity_matrix(sentences, stop_words):
	# Create an empty similarity matrix
	similarity_matrix = np.zeros((len(sentences), len(sentences)))
	for idx1 in range(len(sentences)):
		for idx2 in range(len(sentences)):
			if idx1 == idx2: #ignore if both are same sentences
				continue
			similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

			print(sentences[idx1], sentences[idx2], similarity_matrix[idx1][idx2])	
	
	return similarity_matrix

In [4]:
def generate_summary (article_content, top_n = 3):
  stop_words = stopwords.words('english')
  summarize_text = []
  # Step 1 - Read data
  sentences = sent_tokenize(article_content)
  # Step 2 - Generate Similary Martix across sentences
  sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
	# Step 3 - Rank sentences in similarity martix
  sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
  scores = nx.pagerank(sentence_similarity_graph)
  print(scores)
  # Step 4 - Sort the rank and pick top sentences
  ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) 
  print("Indexes of top ranked_sentence order are ", ranked_sentence)
  for i in range(top_n):
    summarize_text.append("".join(ranked_sentence[i][1]))
    # Step 5 - Offcourse, output the summarize texr
  
  print("Summarize Text: \n", "\n".join(summarize_text))

	#Save summary to text file
	

In [7]:
# Fetching the content from the URL
#fetched_data = urllib.request.urlopen('https://en.wikipedia.org/wiki/Text_mining')

#article_read = fetched_data.read()

# Parsing the URL content and storing in a variable
#article_parsed = BeautifulSoup.BeautifulSoup(article_read,'html.parser')

# Returning <p> tags
#paragraphs = article_parsed.find_all('p')

article_content = ''
text = '''
How to Make Meetings Shorter (for Real) Time is money. Reclaim your precious minutes with these strategies for in-person and virtual meetings. How to Make Virtual Meetings Shorter
If you’re the meeting scheduler, consider doing away with one-hour or 45-minute meetings unless they’re mission-critical or involve many teams presenting updates. Ask yourself: Can we get everything done in 30 minutes? How about 15? Don’t overcommit on time you set aside in the calendar.
Before you send the invite, ask yourself: “Do all of these people need to be at this meeting?” You can always fill in those who aren’t in attendance with a short update, if needed. Meetings with too many participants are also likely to last longer. 
The person who organized the meeting should lead the discussion and be mindful of keeping it on track and on time. If that’s you and it’s not your strong suit, assign someone else to lead the meeting. Don’t have a meeting with no one in charge; that’s a great way to wander and go too long.
The pandemic shutdown made Zoom a tool that nearly everyone had to start using, from CEOs to K-12 students. But weirdly, people didn’t seem to get much better at navigating Zoom’s many control buttons and getting sound to work. It’s 2023 and meetings are still marred with, “Can you hear me? Am I on mute?”
Let participants know in advance that the virtual meeting room will open up 10 minutes early for a “get your mic and camera working” grace period time. If possible, have someone in the meeting room to help get those kinks worked out.
Be consistent with what virtual platform you use for meetings. Switching from Zoom to Microsoft Teams to Google Meet introduces more variables that can lead to delays.
For all types of meetings, make sure there’s an agenda. Nothing makes a meeting drag longer than a lack of action items and aimless conversation.
And speaking of conversation, put a five-minute limit (or shorter) on small talk. If a team hasn’t met in a while, some pleasantries are fine, but don’t let it derail the agenda. For regular meetings, table social chat until after the meeting. 
If participants feel rushed and are resistant to the idea of shorter meetings, remind them that time is money. You can find several “How much is this meeting costing us?” calculators that will give you a good sense of how much money long meetings waste.

How to Make Hybrid and In-Person Meetings Shorter
As with all-virtual meetings, consider opening up the online meeting room and in-person space five to 10 minutes early, and make it clear ahead of time that hybrid meetings are not going to be paused for those online to get set up.
It’s hard to regulate people’s distractions when they’re not in the room, but for anyone participating in person, consider adopting a “no phone distractions” policy during meetings. Phones are silenced and put away except for emergencies. Instead of giving people an excuse to reach for their mobile devices to look at your agenda, consider printing it out. Nobody likes wasting paper, but getting people’s eyes off their screens may be worth it.
Speaking of agendas, yours, no matter the meeting type, should include target discussion times for each major topic. You might only need five minutes for some topics, while others may require 15 minutes or more. Mark those times down so participants know what the time priorities are. If you want to take that idea to the next level, do what podcasters do to keep track of time: Have a Pardon the Interruption-style countdown timer for each topic. You can find lots of countdown timer apps or sites online for free. BigTimer.net is one of the easiest to use. It works from any browser, can be customized, and doesn’t add any cost to your meetings.
Now, let’s talk about food. Yes, food. Have you ever been in a meeting and had things go completely off track due to a birthday cake or a spread of food that takes up 10 or 15 minutes before a meeting? People filling up paper plates with goodies from a fruit and veggie tray can be a huge time suck, not to mention an annoyance for those joining online who can’t partake in the goodies.
If you must have snacks for a meeting, make sure it’s grab-and-go food. Bagels are great. A breakfast taco bar with 10 kinds of salsa? Say goodbye to your target wrap-up time.
Finally, if all the topics of discussion have been hit and action items assigned and there’s still time left on the meeting clock, don’t hesitate to adjourn early. You’ll be an office hero if you can help your fellow coworkers reclaim some of their time. Be vigilant: Don’t let long, inefficient meetings become the norm.

'''
# Looping through the paragraphs and adding them to the variable
#for p in paragraphs:  
#    article_content += p.text


generate_summary(text)


How to Make Meetings Shorter (for Real) Time is money. Reclaim your precious minutes with these strategies for in-person and virtual meetings. 0.9315218638140802

How to Make Meetings Shorter (for Real) Time is money. How to Make Virtual Meetings Shorter
If you’re the meeting scheduler, consider doing away with one-hour or 45-minute meetings unless they’re mission-critical or involve many teams presenting updates. 0.9432082079186537

How to Make Meetings Shorter (for Real) Time is money. Ask yourself: Can we get everything done in 30 minutes? 0.9158425036848862

How to Make Meetings Shorter (for Real) Time is money. How about 15? 0.4959400140039517

How to Make Meetings Shorter (for Real) Time is money. Don’t overcommit on time you set aside in the calendar. 0.9238119332199607

How to Make Meetings Shorter (for Real) Time is money. Before you send the invite, ask yourself: “Do all of these people need to be at this meeting?” You can always fill in those who aren’t in attendance with a