# **Title: 7.2 Exercise**
# **Author: Michael J. Montana**
# **Date: 30 April 2023**
# **Modified By: N/A**
# **Description: Performs Text Summation on the Movie Fight Club**

In [74]:
import pandas as pd
from myclassesv4 import Normalize_Corpus
from myclassesv4 import Summarizer
import gensim
import nltk
import os
import numpy as np
from gensim.models import LsiModel
from scipy.sparse.linalg import svds
from tqdm import tqdm
import matplotlib.pyplot as plt
from gensim.summarization import summarize
import re
import spacy
stop_words = nltk.corpus.stopwords.words('english')

In [75]:
# Read the input text from a file
with open('data/fc_script.txt', 'r') as f:  #https://docs.python.org/3/library/functions.html#open
    fc = f.read()
corpus = pd.Series(fc)
corpus

0    FIGHT CLUB\n\nBy Jim Uhls\n\n\nPG 1\n\nSCREEN ...
dtype: object

In [76]:
norm = Normalize_Corpus() #instantiating normalizer class
# cleanfc = fc.copy() #creating copy of data
text= norm.normalize(corpus,html_stripping=True, contraction_expansion=True, #cleaning data
                                             accented_char_removal=True, text_lower_case=True,
                                             text_lemmatization=False, special_char_removal=True,
                                             stopword_removal=True, digits_removal=True)

Stripping HTML...
Expanding Contratcions...
Removing Accent Markings...
Changing Letter Case to Lower...
Removing Special Characters...
Removing Stopwords...
Removing Numbers...
Your Data is Clean


In [77]:
summarizer = Summarizer() #instantiating summarizer class

# <font color=2d5db5>**Using either spaCy or Gensim, create your own text summarization model using a very specific corpus (e.g., dialog from Star Wars). Be careful not to choose too big a corpus or too complex a model because it could take a very long time to build. If you want to go a step further, create a front end for it (using something like Flask).**

# <font color=2d5db5>**Keywords:**

### Collocations:

In [78]:
fight_club=summarizer.flatten_corpus(text) #flattens the corpus
n_gram=summarizer.get_top_ngrams(corpus=fight_club, ngram_val=1, limit=10) #calling function returning unigrams
bigram=summarizer.get_top_ngrams(corpus=fight_club, ngram_val=2, limit=10) #calling function returning bigrams
trigram=summarizer.get_top_ngrams(corpus=fight_club, ngram_val=3, limit=10) #calling function returning trigrams

print('Top N-grams:')
for item in n_gram:
    print('\t',item)
print('\nTop Bigrams:')
for item in bigram:
    print('\t',item)
print('\nTop Trigrams:')
for item in trigram:
    print('\t',item)

Top N-grams:
	 ('jack', 1010)
	 ('tyler', 468)
	 ('marla', 220)
	 ('int', 177)
	 ('vo', 164)
	 ('pg', 128)
	 ('jacks', 115)
	 ('back', 107)
	 ('night', 98)
	 ('room', 80)

Top Bigrams:
	 ('jack vo', 163)
	 ('pg jack', 41)
	 ('jack tyler', 41)
	 ('fight club', 33)
	 ('night jack', 32)
	 ('pg int', 31)
	 ('tyler jack', 24)
	 ('can not', 22)
	 ('later jack', 21)
	 ('jack walks', 20)

Top Trigrams:
	 ('pg jack vo', 15)
	 ('moments later jack', 12)
	 ('int airplane cabin', 10)
	 ('int jacks bedroom', 7)
	 ('night jack walks', 7)
	 ('int hotel room', 7)
	 ('paper st house', 7)
	 ('int living room', 7)
	 ('rule fight club', 7)
	 ('int high school', 6)


In [79]:
summarizer.collocation_finder(text) #returns the top 10 collocation pairs and triplets

Collocation Finder:
	Bigram Association Measures:
		 [('jack', 'vo'), ('jack', 'tyler'), ('pg', 'jack'), ('fight', 'club'), ('night', 'jack'), ('pg', 'int'), ('tyler', 'jack'), ('later', 'jack'), ('jack', 'looks'), ('jack', 'walks')]
		 [('absenteeism', 'unpresentable'), ('accept', 'fate'), ('accident', 'statistics'), ('according', 'ancient'), ('accounts', 'receipt'), ('accurate', 'description'), ('acts', 'vandalism'), ('actuary', 'twice'), ('age', 'email'), ('allsinging', 'alldancing')]
	Trigram Association Measures:
		 [('pg', 'jack', 'vo'), ('moments', 'later', 'jack'), ('int', 'airplane', 'cabin'), ('int', 'hotel', 'room'), ('int', 'jacks', 'bedroom'), ('int', 'living', 'room'), ('night', 'jack', 'walks'), ('paper', 'st', 'house'), ('rule', 'fight', 'club'), ('int', 'hallway', 'jack')]
		 [('absenteeism', 'unpresentable', 'appearance'), ('according', 'ancient', 'chinese'), ('accounts', 'receipt', 'required'), ('ammonium', 'oxalate', 'potassium'), ('ancient', 'chinese', 'custom'), (

### Weighted Tag-Based Phrase Extraction

In [80]:
# creating precleaned data with different parameters
sent_tokens = nltk.sent_tokenize(fc)
text_chunks=pd.Series(sent_tokens)
text_chunks= norm.normalize(text_chunks,html_stripping=True, contraction_expansion=True,
                                             accented_char_removal=True, text_lower_case=False,
                                             text_lemmatization=False, special_char_removal=True,
                                             stopword_removal=False, digits_removal=True)
text_chunks.head(5)

Stripping HTML...
Expanding Contratcions...




Removing Accent Markings...
Removing Special Characters...
Removing Numbers...
Your Data is Clean


0       FIGHT CLUB By Jim Uhls PG SCREEN BLACK JACK VO
1    People were always asking me did I know Tyler ...
2                                          FADE IN INT
3    SOCIAL ROOM TOP FLOOR OF HIGHRISE NIGHT TYLER ...
4                              They struggle intensely
dtype: object

In [81]:
chunks = summarizer.get_chunks(text_chunks)
print('Chunks:\n', chunks, '\n')

Chunks:



In [82]:
tfidf=summarizer.get_tfidf_weighted_keyphrases(text=text_chunks,top_n=10)
print('Top 10 TF-IDF keyphrases:')
for item in tfidf:
    print('\t',item)

Top 10 TF-IDF keyphrases:
	 ('fade int', 1.0)
	 ('disheveled jack', 1.0)
	 ('jack vo', 1.0)
	 ('pg jack vo', 1.0)
	 ('dropbydrop', 1.0)
	 ('move jacks face', 1.0)
	 ('jacks face', 1.0)
	 ('pg pull back wide int', 1.0)
	 ('product endorsements', 1.0)
	 ('big cheesebread', 1.0)


In [83]:
summarizer.keyword(fight_club)

Keywords:
 [('marlas', 0.247), ('screen black jack', 0.239), ('barrel handgun lodged jacks mouth struggle intensely', 0.106), ('facing', 0.096), ('asking know tyler durden fade int social room', 0.085), ('liked', 0.077), ('turns', 0.072), ('turn', 0.072), ('turning', 0.072), ('eye', 0.068), ('looked weeping', 0.062), ('hand', 0.062), ('doors', 0.06), ('heading', 0.058), ('noise gunshot makes expanding gases totally forgot tylers', 0.058), ('face slowly pull', 0.057), ('smiles', 0.057), ('pulls', 0.056), ('pulled', 0.056), ('continuous looking exactly', 0.052), ('takes', 0.052), ('guys', 0.049), ('guy', 0.049), ('gets', 0.048), ('open', 0.048)]


# <font color=2d5db5>**Topic Modeling:**

In [84]:
# page 365
DATA_PATH = 'data/nipstxt/'
print(os.listdir(DATA_PATH))

# page 366
folders = ['nips{0:02}'.format(i) for i in range(0, 13)]
# Read all texts into a list.
papers = []
for folder in folders:
    file_names = os.listdir(DATA_PATH + folder)
    for file_name in file_names:
        with open(DATA_PATH + folder + '/' + file_name, encoding='utf-8',
                  errors='ignore', mode='r+') as f:
            data = f.read()
        papers.append(data)
# save the papers list, you'll need this a bit later on

print('Length of papers:\n', len(papers), '\n')
print('Paper fragment:\n', papers[0][:165], '\n')

['idx', 'MATLAB_NOTES', 'nips00', 'nips01', 'nips02', 'nips03', 'nips04', 'nips05', 'nips06', 'nips07', 'nips08', 'nips09', 'nips10', 'nips11', 'nips12', 'nips16', 'orig', 'RAW_DATA_NOTES', 'README_yann']
Length of papers:
 1740 

Paper fragment:
 1 
CONNECTIVITY VERSUS ENTROPY 
Yaser S. Abu-Mostafa 
California Institute of Technology 
Pasadena, CA 91125 
ABSTRACT 
How does the connectivity of a neural network 



In [85]:
#Text Wrangling
stop_words = nltk.corpus.stopwords.words('english')
wtk = nltk.tokenize.RegexpTokenizer(r'\w+')
wnl = nltk.stem.wordnet.WordNetLemmatizer()

def normalize_corpus(papers):
    norm_papers = []
    for paper in papers:
        paper = paper.lower()
        paper_tokens = [token.strip() for token in wtk.tokenize(paper)]
        paper_tokens = [wnl.lemmatize(token) for token in paper_tokens
                        if not token.isnumeric()]
        paper_tokens = [token for token in paper_tokens if len(token) > 1]
        paper_tokens = [token for token in paper_tokens if token not in stop_words]
        paper_tokens = list(filter(None, paper_tokens))
        if paper_tokens:
            norm_papers.append(paper_tokens)
    return norm_papers

norm_papers = normalize_corpus(papers)
print('Length of normalized papers:', len(norm_papers), '\n')

Length of normalized papers: 1740 



### Text Representation with Feature Engineering

In [86]:
# sample demonstration
bigram = gensim.models.Phrases(norm_papers, min_count=20, threshold=20, delimiter=b'_')
bigram_model = gensim.models.phrases.Phraser(bigram)
print('Bigram model: \n\t', bigram_model[norm_papers[0]][:50])

Bigram model: 
	 ['connectivity', 'versus', 'entropy', 'yaser', 'abu_mostafa', 'california_institute', 'technology_pasadena', 'ca_abstract', 'doe', 'connectivity', 'neural_network', 'number', 'synapsis', 'per', 'neuron', 'relate', 'complexity', 'problem', 'handle', 'measured', 'entropy', 'switching', 'theory', 'would', 'suggest', 'relation', 'since', 'boolean_function', 'implemented', 'using', 'circuit', 'low', 'connectivity', 'using', 'two', 'input', 'nand', 'gate', 'however', 'network', 'learns', 'problem', 'example', 'using', 'local', 'learning', 'rule', 'prove', 'entropy', 'problem']


In [87]:
# Create a dictionary representation of the documents.
norm_corpus_bigrams = [bigram_model[doc]
                       for doc in norm_papers]
dictionary = gensim.corpora.Dictionary(norm_corpus_bigrams)
print('Sample word to number mappings:\n', list(dictionary.items())[:15], '\n')
print('Total vocabulary size:', len(dictionary), '\n')

Sample word to number mappings:
 [(0, '0a'), (1, '2h'), (2, '2h2'), (3, '2he'), (4, '2n'), (5, '__c'), (6, '_c'), (7, '_k'), (8, 'a2'), (9, 'ability'), (10, 'abu_mostafa'), (11, 'access'), (12, 'accommodate'), (13, 'according'), (14, 'accumulated')] 

Total vocabulary size: 78892 



In [88]:
# Filter out words that occur in fewer than 20 documents, or more than 50% of the documents.
dictionary.filter_extremes(no_below=20, no_above=0.6)
print('Total vocabulary size:', len(dictionary), '\n')

Total vocabulary size: 7756 



In [89]:
# Transforming corpus into bag of words vectors
bow_corpus = [dictionary.doc2bow(text) for text in norm_corpus_bigrams]
print('Bag of words:\n', bow_corpus[1][:50], '\n')

Bag of words:
 [(3, 1), (12, 3), (14, 1), (15, 1), (16, 1), (17, 16), (20, 1), (24, 1), (26, 1), (31, 3), (35, 1), (36, 1), (40, 3), (41, 5), (42, 1), (48, 1), (53, 3), (55, 1), (56, 2), (58, 1), (60, 3), (63, 5), (64, 4), (65, 2), (73, 1), (74, 1), (75, 1), (76, 1), (77, 3), (82, 1), (83, 4), (84, 1), (85, 1), (86, 2), (94, 1), (96, 2), (97, 3), (106, 1), (110, 1), (119, 2), (120, 4), (121, 2), (124, 2), (127, 1), (128, 1), (132, 1), (133, 1), (135, 6), (136, 1), (144, 1)] 



In [90]:
# viewing actual terms and their counts
print('Terms and counts:\n', [(dictionary[idx], freq) for idx, freq in bow_corpus[1][:50]], '\n')

Terms and counts:
 [('ability', 1), ('aip', 3), ('although', 1), ('american_institute', 1), ('amount', 1), ('analog', 16), ('appears', 1), ('architecture', 1), ('aspect', 1), ('available', 3), ('become', 1), ('becomes', 1), ('binary', 3), ('biological', 5), ('bit', 1), ('cannot', 1), ('circuit', 3), ('collective', 1), ('compare', 2), ('complex', 1), ('computing', 3), ('conference', 5), ('connected', 4), ('connectivity', 2), ('define', 1), ('defined', 1), ('defines', 1), ('definition', 1), ('denker', 3), ('designed', 1), ('desired', 4), ('diagonal', 1), ('difference', 1), ('directly', 2), ('ed', 1), ('el', 2), ('element', 3), ('equivalent', 1), ('eventually', 1), ('feature', 2), ('final', 4), ('find', 2), ('fixed', 2), ('frequency', 1), ('furthermore', 1), ('generating', 1), ('get', 1), ('global', 6), ('go', 1), ('hence', 1)] 



In [91]:
# total papers in the corpus
print('Total number of papers:', len(bow_corpus), '\n')

Total number of papers: 1740 



### Latent Semantic Indexing

In [92]:
TOTAL_TOPICS = 10
lsi_bow = LsiModel(bow_corpus, id2word=dictionary, num_topics=TOTAL_TOPICS,
                   onepass=True, chunksize=1740, power_iters=1000)

for topic_id, topic in lsi_bow.print_topics(num_topics=10, num_words=20):
    print('Topic #' + str(topic_id+1)+':')
    print(topic, '\n')

for n in range(TOTAL_TOPICS):
    print('Topic #' + str(n+1)+':')
    print('='*50)
    d1 = []
    d2 = []
    for term, wt in lsi_bow.show_topic(n, topn=20):
        if wt >= 0:
            d1.append((term, round(wt, 3)))
        else:
            d2.append((term, round(wt, 3)))

    print('Direction 1:', d1)
    print('-'*50)
    print('Direction 2:', d2)
    print('-'*50, '\n')

# page 379
term_topic = lsi_bow.projection.u
singular_values = lsi_bow.projection.s
topic_document = (gensim.matutils.corpus2dense(lsi_bow[bow_corpus],
                                               len(singular_values)).T / singular_values).T
print(term_topic.shape, singular_values.shape, topic_document.shape)

document_topics = pd.DataFrame(np.round(topic_document.T, 3),
                               columns=['T' + str(i) for i in range(1, TOTAL_TOPICS+1)])
print(document_topics.head(5))

# page 380
document_numbers = [13, 250, 500]

for document_number in document_numbers:
    top_topics = list(document_topics.columns[np.argsort(
        -np.absolute(document_topics.iloc[document_number].values))[:3]])
    print('Document #' + str(document_number)+':')
    print('Dominant Topics (top 3):', top_topics)
    print('Paper Summary:')
    print(papers[document_number][:500], '\n')

Topic #1:
0.215*"unit" + 0.212*"state" + 0.187*"training" + 0.177*"neuron" + 0.162*"pattern" + 0.145*"image" + 0.140*"vector" + 0.125*"feature" + 0.122*"cell" + 0.110*"layer" + 0.101*"task" + 0.097*"class" + 0.091*"probability" + 0.089*"signal" + 0.087*"step" + 0.086*"response" + 0.085*"representation" + 0.083*"noise" + 0.082*"rule" + 0.081*"distribution" 

Topic #2:
0.487*"neuron" + 0.396*"cell" + -0.257*"state" + 0.191*"response" + -0.187*"training" + 0.170*"stimulus" + 0.117*"activity" + -0.109*"class" + 0.099*"spike" + 0.097*"pattern" + 0.096*"circuit" + 0.096*"synaptic" + -0.095*"vector" + 0.090*"signal" + 0.090*"firing" + 0.088*"visual" + -0.084*"classifier" + -0.083*"action" + -0.078*"word" + 0.078*"cortical" 

Topic #3:
-0.627*"state" + 0.395*"image" + -0.219*"neuron" + 0.209*"feature" + -0.188*"action" + 0.137*"unit" + 0.131*"object" + -0.130*"control" + 0.129*"training" + -0.109*"policy" + 0.103*"classifier" + 0.090*"class" + -0.081*"step" + -0.081*"dynamic" + 0.080*"classifi

# <font color=2d5db5>**Automated Document Summarization:**

### Gensim

In [93]:

#Wikipedia.(2023). Fight Club https://en.wikipedia.org/wiki/Fight_Club
reception = """
Fight Club is a 1999 American film directed by David Fincher and starring Brad Pitt, Edward Norton, and Helena Bonham Carter. It is based on the 1996 novel of the same name by Chuck Palahniuk. Norton plays the unnamed narrator, who is discontented with his white-collar job. He forms a "fight club" with soap salesman Tyler Durden (Pitt), and becomes embroiled in a relationship with a mysterious woman, Marla Singer (Bonham Carter).[5][6]

Palahniuk's novel was optioned by Fox 2000 Pictures producer Laura Ziskin, who hired Jim Uhls to write the film adaptation. Fincher was selected because of his enthusiasm for the story. He developed the script with Uhls and sought screenwriting advice from the cast and others in the film industry. It was filmed in and around Los Angeles from July to December 1998. He and the cast compared the film to Rebel Without a Cause (1955) and The Graduate (1967), with a theme of conflict between Generation X and the value system of advertising.[7][8]

Studio executives did not like the film, and they restructured Fincher's intended marketing campaign to try to reduce anticipated losses. Fight Club failed to meet the studio's expectations at the box office, and received polarized reactions from critics. It was ranked as one of the most controversial and talked-about films of the 1990s. The film later found commercial success with its home video release, establishing Fight Club as a cult classic and causing media to revisit the film. In 2009, on the tenth anniversary of the film's release, The New York Times dubbed it the "defining cult movie of our time."[9]
"""
plot="""
The Narrator (who is not named in the movie) is a chronic insomniac who is unfulfilled both by his job as an automobile recall specialist and the material wealth it affords him. As a substitute for therapy, he attends support groups for problems he doesn't really have, such as alcoholism and cancer. Another impostor, Marla Singer, begins attending the same groups. Her presence is taken by the Narrator as a constant reminder of his dishonesty, interfering with the therapeutic effect he's after. He confronts Marla, and proposes they divide group attendance, to which she grudgingly agrees.

On a flight home from a business trip, the Narrator meets Tyler Durden, a soap salesman, who tells him he is trapped by consumerism. The Narrator's apartment and all of his belongings are destroyed by an explosion, so he moves into Tyler's dilapidated house in an industrial area. The two start having consensual fistfights in the parking lot of a bar, which attracts other men and eventually leads to the formation of Fight Club, which meets in the bar's basement. Marla overdoses on pills while the Narrator ignores her phone call for help, but Tyler saves her and they begin a sexual relationship.

The Narrator quits his job and blackmails his boss for the company's assets to support Fight Club. More members join Fight Club, one of them being Robert "Bob" Paulson, a man the Narrator had befriended at a cancer support group. Tyler then recruits the men to his new organization, Project Mayhem, which engages in acts of vandalism. When the Narrator complains about being excluded, Tyler reveals that he was the one who caused the explosion at the Narrator's condo. Tyler disappears, and when Bob is killed by the police during a sabotage operation, the Narrator tries to stop Project Mayhem. He follows a paper trail to cities Tyler had visited and finds that Project Mayhem has spread throughout the country. Marla and the Project members address the Narrator as "Mr. Durden," and he realizes that he and Tyler are the same person.

The Narrator learns that Tyler plans to erase debt by destroying buildings containing credit card records. He tries to warn Marla, but she does not believe him. He goes to the police and is threatened by officers who reveal they are members of Project Mayhem; he then escapes to try to disarm the explosives in one building but is subdued by Tyler and held at gunpoint on the top floor. The Narrator realizes that it is actually himself who is holding the gun, and he fires the weapon into his own mouth, blowing a hole through his cheek. Tyler stands motionless, smoke coiling from his head, and then collapses and vanishes. Marla arrives, being brought by Project members and finds the Narrator badly wounded but alive. He tells her that she met him "at a very strange time" in his life, and they hold hands and watch as the buildings around them explode.
"""

DOCUMENT = re.sub(r'\n|\r', ' ', reception)
DOCUMENT = re.sub(r' +', ' ', reception)
DOCUMENT = reception.strip()
print('Fight Club Production and Reception Summary:\n', summarize(reception, split=False), '\n')
print('Limited Fight Club Production and Reception Summary\n', summarize(reception, word_count=75, split=False), '\n')

Fight Club Production and Reception Summary:
 Fight Club is a 1999 American film directed by David Fincher and starring Brad Pitt, Edward Norton, and Helena Bonham Carter.
Palahniuk's novel was optioned by Fox 2000 Pictures producer Laura Ziskin, who hired Jim Uhls to write the film adaptation. 

Limited Fight Club Production and Reception Summary
 Fight Club is a 1999 American film directed by David Fincher and starring Brad Pitt, Edward Norton, and Helena Bonham Carter.
Palahniuk's novel was optioned by Fox 2000 Pictures producer Laura Ziskin, who hired Jim Uhls to write the film adaptation.
The film later found commercial success with its home video release, establishing Fight Club as a cult classic and causing media to revisit the film. 



### SpaCy

In [94]:
nlp = spacy.load('en_core_web_sm')
def summarize(text):
    doc = nlp(text) # Parse the text with spaCy
    sentences = [sent.text for sent in doc.sents]# Create a list of sentences from the parsed text
    total_words = len([token.text for token in doc if not token.is_stop]) # Calculate the total number of words in the document
    target_words = int(total_words * 0.5)# Calculate the target number of words for the summary (20% of the total words)
    running_words = 0 # running total of words added to the summary
    summary = ''
    for sentence in sentences:
        sent = nlp(sentence)
        sent_words = len([token.text for token in sent if not token.is_stop]) # Calculate the number of words in the sentence (excluding stop words)
        if running_words + sent_words <= target_words: # If adding this sentence to the summary would not exceed the target number of words
            summary += sentence.strip() + ' '
            running_words += sent_words
        else:
            break # breaks the loop if word count coes over
    return summary.strip()
summarize(plot)

'The Narrator (who is not named in the movie) is a chronic insomniac who is unfulfilled both by his job as an automobile recall specialist and the material wealth it affords him. As a substitute for therapy, he attends support groups for problems he doesn\'t really have, such as alcoholism and cancer. Another impostor, Marla Singer, begins attending the same groups. Her presence is taken by the Narrator as a constant reminder of his dishonesty, interfering with the therapeutic effect he\'s after. He confronts Marla, and proposes they divide group attendance, to which she grudgingly agrees. On a flight home from a business trip, the Narrator meets Tyler Durden, a soap salesman, who tells him he is trapped by consumerism. The Narrator\'s apartment and all of his belongings are destroyed by an explosion, so he moves into Tyler\'s dilapidated house in an industrial area. The two start having consensual fistfights in the parking lot of a bar, which attracts other men and eventually leads to

In [95]:
#I thought this was more interesting for the scoring aspect and the potential I think it has but i didn't have time to work with it: https://github.com/kamal2230/text-summarization/blob/master/Summarisation_using_spaCy.ipynb

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

doc=plot
nlp = spacy.load('en_core_web_sm')
doc = nlp(doc)
keyword = []
stopwords = list(STOP_WORDS)
pos_tag = ['PROPN', 'ADJ', 'NOUN', 'VERB']
for token in doc:
    if(token.text in stopwords or token.text in punctuation):
        continue
    if(token.pos_ in pos_tag):
        keyword.append(token.text)
freq_word = Counter(keyword)
max_freq = Counter(keyword).most_common(1)[0][1]
for word in freq_word.keys():
        freq_word[word] = (freq_word[word]/max_freq)
freq_word.most_common(5)
sent_strength={}
for sent in doc.sents:
    for word in sent:
        if word.text in freq_word.keys():
            if sent in sent_strength.keys():
                sent_strength[sent]+=freq_word[word.text]
            else:
                sent_strength[sent]=freq_word[word.text]
print(sent_strength)
summarized_sentences = nlargest(3, sent_strength, key=sent_strength.get)
final_sentences = [ w.text for w in summarized_sentences ]
summary = ' '.join(final_sentences)
print('\n Final Summary:\n',summary)#lol, so the final summary gives away the ending.

{
The Narrator (who is not named in the movie) is a chronic insomniac who is unfulfilled both by his job as an automobile recall specialist and the material wealth it affords him.: 1.9285714285714282, As a substitute for therapy, he attends support groups for problems he doesn't really have, such as alcoholism and cancer.: 0.857142857142857, Another impostor, Marla Singer, begins attending the same groups.: 0.857142857142857, Her presence is taken by the Narrator as a constant reminder of his dishonesty, interfering with the therapeutic effect he's after.: 1.5714285714285712, He confronts Marla, and proposes they divide group attendance, to which she grudgingly agrees.

: 0.9285714285714284, On a flight home from a business trip, the Narrator meets Tyler Durden, a soap salesman, who tells him he is trapped by consumerism.: 2.785714285714286, The Narrator's apartment and all of his belongings are destroyed by an explosion, so he moves into Tyler's dilapidated house in an industrial area