In [1]:
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
import re
from datetime import datetime
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

In [2]:
summarize_text = []

In [3]:
def read_article(file_name):
    file = open(file_name, encoding="utf-8")
    filedata = file.readlines()
    new_lst = [x[:-1] for x in filedata]
    sentences = []
    i =0;
    while("" in new_lst):
        new_lst[i] = re.sub("[^A-Za-z]","",new_lst[i])
        new_lst.remove("")       
        sentences.append(new_lst[i])
        i+=1     

    return sentences

In [4]:
def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = ["the", "a", "an", "in", ""]
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
    return 1 - cosine_distance(vector1, vector2)

In [5]:
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
    return similarity_matrix

In [6]:
def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    # Step 1 - Read text and tokenize
    sentences =  read_article(file_name)
    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)
    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    #print("Indexes of top ranked_sentence order are ", ranked_sentence)
    for i in range(top_n):
        summarize_text.append("".join(re.sub(r"(\w)([A-Z])", r"\1 \2", ranked_sentence[i][1])))
        
    # Step 5 - Offcourse, output the summarize text
    print("Summarize Text: \n", ".\n ".join(summarize_text))

In [7]:
generate_summary("input\\Transcript.txt")

Summarize Text: 
 Andrew Knight Justthismonth Andsowearesofortunatetohavesomeonewhoisskilledandhasdeepexpertiseinthistopicandatthesametimesomeonewhoisaskilledcommunicatoraboutthistopic Andsowithoutfurtheradopleasejoinmeinwelcoming Professorlibertythathurt.
 Paul Mc Cartney Thispicturesunfathomablyhorribleeventisreallywhatsparkedtheworldsdiscussionofthe Syrianrefugeecrisisandonewouldimagine Imeanhowcouldyounotlookatthatphotoandfeelsortofadeeponunbelievablesadnessanditdidincreasedonationsbutnotforverylong.
 Paul Mc Cartney Thisis Alan Curtis Hesathreeyearold Syrianboywhoinwasfounddrownedona Turkishbeachandhisfamilywere Syrianrefugeesfleeingto Europe.
 Paul Mc Cartney Hieverybody Thankyou Ithink Ineedtofiremy PRperson Andhaveyou Andrew Thatwasgreat Umdoesthescreen Look Okay Canyouallseeit.
 Paul Mc Cartney Tryingtosaythatwiththeintroductionof Uber Londonhastaxishavegottenmuchlesssafewellintherereportsofrapeorsexualassaultbytaxis Whenthisheadlinewaswritten


In [8]:
mail_body = "Summarize Text: ".join(summarize_text)

In [9]:
username = "email_id"
password = "********"
mail_from = "email_id"
mail_to = "email_id"
now = datetime.now()
mail_subject = "MoM - " + now.strftime("%m/%d/%Y")

In [10]:
mimemsg = MIMEMultipart()
mimemsg['From']=mail_from
mimemsg['To']=mail_to
mimemsg['Subject']=mail_subject
mimemsg.attach(MIMEText(mail_body, 'plain'))

connection = smtplib.SMTP(host='smtp.office365.com', port=587)
connection.starttls()
connection.login(username,password)
connection.send_message(mimemsg)
connection.quit()

(221, b'2.0.0 Service closing transmission channel')