# Extracting key words and key sentences from text of Beauty and the Beast with SVD and Rank k approximation

## import libraries

In [727]:
import re
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.pipeline import make_pipeline
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF

## reading the data

In [735]:
with open('beauty.txt', encoding="utf8", errors='ignore') as f:
    text=f.read()

## split sentences of data

In [736]:
text=text.split('.')
text

['There was once a very rich merchant, who had six children, three sons, and three daughters; being a man of sense, he spared no cost for their education, but gave them all kinds of masters',
 ' His daughters were extremely handsome, especially the youngest',
 ' When she was little everybody admired her, and called her "The little Beauty;" so that, as she grew up, she still went by the name of Beauty, which made her sisters very jealous',
 '\nThe youngest, as she was handsomer, was also better than her sisters',
 ' The two eldest had a great deal of pride, because they were rich',
 " They gave themselves ridiculous airs, and would not visit other merchants' daughters, nor keep company with any but persons of quality",
 ' They went out every day to parties of pleasure, balls, plays, concerts, and so forth, and they laughed at their youngest sister, because she spent the greatest part of her time in reading good books',
 '\nAs it was known that they were great fortunes, several eminent m

## remove '\n' in text

In [737]:
text2=[]
for sentence in text:
    sentence = re.sub("\n",' ',sentence)
    text2.append(sentence)
text2

['There was once a very rich merchant, who had six children, three sons, and three daughters; being a man of sense, he spared no cost for their education, but gave them all kinds of masters',
 ' His daughters were extremely handsome, especially the youngest',
 ' When she was little everybody admired her, and called her "The little Beauty;" so that, as she grew up, she still went by the name of Beauty, which made her sisters very jealous',
 ' The youngest, as she was handsomer, was also better than her sisters',
 ' The two eldest had a great deal of pride, because they were rich',
 " They gave themselves ridiculous airs, and would not visit other merchants' daughters, nor keep company with any but persons of quality",
 ' They went out every day to parties of pleasure, balls, plays, concerts, and so forth, and they laughed at their youngest sister, because she spent the greatest part of her time in reading good books',
 ' As it was known that they were great fortunes, several eminent mer

In [738]:
text=text2

In [739]:
len(text)

173

## create term-sentence matrix

In [740]:
pipeline = make_pipeline(CountVectorizer(stop_words="english"),TfidfTransformer())

In [741]:
p=pipeline.fit_transform(text)
type(p)

scipy.sparse.csr.csr_matrix

In [742]:
pipeline.get_feature_names_out()

array(['absence', 'accept', 'accord', 'account', 'accustomed',
       'acquaintance', 'act', 'action', 'added', 'addresses', 'admired',
       'admiring', 'adventure', 'affable', 'affection', 'affectionate',
       'affectionately', 'afflicted', 'afraid', 'agreeably', 'airs',
       'alas', 'aloud', 'amazement', 'amiable', 'amuse', 'amusement',
       'anger', 'angry', 'answer', 'answered', 'anybody', 'apartment',
       'apartments', 'appear', 'appeared', 'appears', 'applied',
       'apprehend', 'apprehensions', 'arbor', 'arbors', 'arms', 'army',
       'arrived', 'ask', 'asked', 'asking', 'asleep', 'astonished',
       'astonishment', 'ate', 'attempt', 'attend', 'attention', 'away',
       'balls', 'banish', 'beast', 'beat', 'beautiful', 'beautifullest',
       'beauty', 'bed', 'bedchamber', 'bedside', 'began', 'begged',
       'beginning', 'behaved', 'behavior', 'beheld', 'behold', 'believe',
       'believed', 'bell', 'belongs', 'beloved', 'beseech', 'best',
       'better', 'bid'

In [743]:
p_array=p.toarray()
p_array

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.43192275],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [744]:
p_array=p_array.T

In [745]:
p_array

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.43192275, 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [746]:
data=pd.DataFrame(data=p_array,index=pipeline.get_feature_names_out())

In [747]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,163,164,165,166,167,168,169,170,171,172
absence,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
accept,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
accord,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
account,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
accustomed,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
years,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.301202,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.49131
yes,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
yield,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000
young,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.344946,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000


## SVD

In [748]:
U,S,V=np.linalg.svd(p_array)

In [749]:
# U : term
U.shape

(964, 964)

In [750]:
# V : sentence
V.shape

(173, 173)

In [751]:
U

array([[-5.86661855e-03,  1.10185848e-02,  3.24513012e-03, ...,
         1.73672894e-03, -4.13317546e-03,  1.81269651e-02],
       [-1.22525314e-02, -1.46278067e-02,  8.84421986e-03, ...,
         3.86366634e-04,  2.63995751e-03, -3.59918715e-01],
       [-6.11068492e-03,  2.89858464e-02, -1.42121189e-02, ...,
        -2.04967481e-02, -6.06288371e-03,  1.66687054e-03],
       ...,
       [-8.56735978e-03, -5.23956141e-03,  2.64131090e-02, ...,
         7.60858328e-01, -6.47570807e-03, -2.35151828e-03],
       [-1.02816029e-02, -8.68569976e-03,  2.40273137e-02, ...,
         1.13391117e-03,  8.57231627e-01,  8.34610763e-04],
       [-1.83993956e-02, -9.83115251e-04,  9.47104953e-03, ...,
        -3.92227581e-04, -3.20037985e-03,  5.31005202e-01]])

In [752]:
enum_U=list(enumerate(U.T[0]))
enum_U

[(0, -0.005866618549424984),
 (1, -0.012252531356775006),
 (2, -0.006110684923396457),
 (3, -0.015718398913686545),
 (4, -0.006340962549265889),
 (5, -0.004847991258305885),
 (6, -0.012508193768173009),
 (7, -0.014918537177836776),
 (8, -0.01358434107366359),
 (9, -0.006331672663096156),
 (10, -0.01930633214014001),
 (11, -0.005522013401897899),
 (12, -0.017174395433778664),
 (13, -0.005731459018453555),
 (14, -0.017089203759012),
 (15, -0.006578630213010852),
 (16, -0.011052333608068298),
 (17, -0.03283793790629643),
 (18, -0.018778748958963472),
 (19, -0.010916696322737797),
 (20, -0.005710088605738574),
 (21, -0.044972601453078964),
 (22, -0.006841871913859508),
 (23, -0.010551466423077175),
 (24, -0.008396291796907195),
 (25, -0.023982461674669532),
 (26, -0.008381156648787542),
 (27, -0.0019687457260047147),
 (28, -0.009891920041434585),
 (29, -0.009891920041434646),
 (30, -0.08035263021374313),
 (31, -0.0027899795983213843),
 (32, -0.0055220134018979464),
 (33, -0.003286791232581

In [753]:
sorted_U=sorted(enum_U,key = lambda x: x[1])

In [754]:
sorted_U

[(62, -0.41732957571314494),
 (727, -0.4094446571813477),
 (58, -0.28752643401569505),
 (287, -0.24384576395930105),
 (553, -0.17282910026371595),
 (206, -0.1471676893991851),
 (753, -0.14530263663130313),
 (629, -0.14226392682861752),
 (362, -0.1400964068885248),
 (355, -0.13537421500896737),
 (930, -0.11216586251850677),
 (283, -0.11141556310474159),
 (599, -0.10658267196724054),
 (877, -0.10019895000014516),
 (188, -0.09379615874116677),
 (490, -0.08660320131241858),
 (767, -0.08472086527879337),
 (30, -0.08035263021374313),
 (472, -0.07839448731328275),
 (536, -0.07416381691588707),
 (555, -0.07356723166697099),
 (100, -0.07018687325000916),
 (387, -0.06898139814690547),
 (433, -0.06871457615608555),
 (568, -0.06734582613950647),
 (520, -0.06517095544144391),
 (189, -0.06498282042003396),
 (461, -0.06317428847619025),
 (738, -0.06234600237265885),
 (645, -0.06161505023106619),
 (63, -0.06148149703981574),
 (735, -0.060947122296731854),
 (851, -0.05645560759037172),
 (864, -0.056160

### 10 keywords with SVD

In [755]:
pipeline.get_feature_names_out()[62]

'beauty'

In [756]:
pipeline.get_feature_names_out()[727]

'said'

In [757]:
pipeline.get_feature_names_out()[58]

'beast'

In [758]:
pipeline.get_feature_names_out()[287]

'father'

In [759]:
pipeline.get_feature_names_out()[553]

'monster'

In [760]:
pipeline.get_feature_names_out()[206]

'die'

In [761]:
pipeline.get_feature_names_out()[753]

'shall'

In [762]:
pipeline.get_feature_names_out()[629]

'poor'

In [763]:
pipeline.get_feature_names_out()[362]

'great'

In [764]:
pipeline.get_feature_names_out()[355]

'good'

In [765]:
V[0]

array([-0.03954608, -0.01473029, -0.09757226, -0.02604881, -0.04449107,
       -0.01423601, -0.0348768 , -0.04811231, -0.07056903, -0.05237529,
       -0.03573383, -0.0367928 , -0.04676167, -0.06141135, -0.08547499,
       -0.05947873, -0.00700705, -0.00288822, -0.04008976, -0.05870293,
       -0.05977968, -0.01419591, -0.08947715, -0.24742784, -0.03111155,
       -0.08023986, -0.07724823, -0.02904222, -0.04299501, -0.02474019,
       -0.07315028, -0.00458493, -0.03876991, -0.02117863, -0.03090683,
       -0.03668548, -0.07614954, -0.00943329, -0.06218018, -0.04945671,
       -0.09556765, -0.0813023 , -0.04725551, -0.06651922, -0.01726458,
       -0.01617871, -0.01135788, -0.04296864, -0.10171305, -0.13731426,
       -0.06530251, -0.03687737, -0.02473954, -0.11587379, -0.08734861,
       -0.08386532, -0.1118856 , -0.08705602, -0.09312082, -0.04345428,
       -0.02659612, -0.15801055, -0.00947225, -0.0587756 , -0.08565458,
       -0.02279596, -0.06083066, -0.02739412, -0.07321487, -0.05

In [766]:
enum_V=list(enumerate(V[0]))
enum_V

[(0, -0.03954608390684885),
 (1, -0.014730287072940884),
 (2, -0.0975722580962135),
 (3, -0.026048814361915973),
 (4, -0.04449107011040933),
 (5, -0.01423600536366522),
 (6, -0.03487679579133233),
 (7, -0.04811230853973891),
 (8, -0.0705690286027164),
 (9, -0.052375290850585114),
 (10, -0.035733827794839954),
 (11, -0.03679279795058559),
 (12, -0.046761668470898014),
 (13, -0.06141134553034111),
 (14, -0.08547498916728724),
 (15, -0.05947873495127003),
 (16, -0.007007047989978027),
 (17, -0.002888224962074221),
 (18, -0.04008976455986919),
 (19, -0.058702933588340285),
 (20, -0.05977967527405458),
 (21, -0.01419591388666823),
 (22, -0.08947715252957511),
 (23, -0.24742783698040074),
 (24, -0.031111549582612058),
 (25, -0.08023986337012579),
 (26, -0.07724822721831069),
 (27, -0.02904221704467022),
 (28, -0.04299500822958626),
 (29, -0.024740187100521494),
 (30, -0.07315027634249716),
 (31, -0.004584929042927142),
 (32, -0.03876990886345643),
 (33, -0.02117862945778709),
 (34, -0.030906

In [767]:
sorted_V=sorted(enum_V,key = lambda x: x[1])
sorted_V

[(23, -0.24742783698040074),
 (152, -0.21008606794914958),
 (92, -0.1766857425808229),
 (61, -0.15801055323885005),
 (103, -0.15652490382433454),
 (77, -0.1498838314089087),
 (105, -0.13800651352472718),
 (49, -0.1373142598673027),
 (146, -0.1357867786630932),
 (116, -0.1341945120527907),
 (117, -0.1316468305965242),
 (74, -0.12926197657390048),
 (97, -0.1258288009129602),
 (121, -0.12366632332666236),
 (76, -0.12256298262331233),
 (106, -0.12162546915819046),
 (151, -0.12065079830948545),
 (53, -0.11587378625871003),
 (123, -0.11484919832848887),
 (56, -0.11188560419035698),
 (118, -0.11104893080548493),
 (86, -0.10663950302247543),
 (78, -0.10388184543150898),
 (72, -0.10299757592163161),
 (48, -0.10171305028325413),
 (99, -0.10074952128532602),
 (124, -0.1006728707943361),
 (162, -0.1004856147916407),
 (71, -0.09957055811570743),
 (110, -0.09881919513507213),
 (2, -0.0975722580962135),
 (79, -0.09752962663569803),
 (40, -0.09556764674988863),
 (136, -0.09485926060161476),
 (95, -0.0

### 5 key sentences with SVD

In [768]:
text[23]

' "What will you have, Beauty?" said her father'

In [769]:
text[152]

'" "No, dear Beast," said Beauty, "you must not die'

In [770]:
text[92]

' "Beauty," said the monster, "will you give me leave to see you sup?" "That is as you please," answered Beauty trembling'

In [771]:
text[61]

'" "Indeed father," said Beauty, "you shall not go to the palace without me, you cannot hinder me from following you'

In [772]:
text[103]

' At last, however, she said trembling, "no Beast'

## Rank k approximation

### using NMF for decomposition

In [773]:
model = NMF(n_components=5, init='random',max_iter=500, random_state=0)
C = model.fit_transform(p_array)
D = model.components_

In [774]:
C

array([[0.        , 0.0249053 , 0.01311429, 0.02281287, 0.        ],
       [0.        , 0.12568674, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.13140181, 0.        , 0.        ],
       ...,
       [0.17151575, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.14133088, 0.        , 0.        , 0.        ],
       [0.10281944, 0.        , 0.05875812, 0.        , 0.02705876]])

In [775]:
C.shape

(964, 5)

In [776]:
D

array([[0.00000000e+00, 3.16219288e-03, 7.51252994e-02, 2.72316555e-02,
        0.00000000e+00, 0.00000000e+00, 2.29255631e-03, 0.00000000e+00,
        4.30690676e-03, 9.31914087e-04, 6.02478585e-03, 1.79760381e-04,
        2.86010239e-02, 0.00000000e+00, 2.02499344e-02, 1.40633561e-02,
        1.05112777e-02, 1.52968834e-03, 3.40686676e-02, 0.00000000e+00,
        4.53405259e-02, 1.77789167e-03, 1.20757981e-02, 3.53020687e-02,
        2.23930189e-02, 5.61677602e-02, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 7.34243164e-03, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        3.29836969e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.61330745e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        8.58013007e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        4.03349319e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        8.13813406e-04, 7.87928440e-03, 3.70347411e-02, 2.910754

In [777]:
D.shape

(5, 173)

### function that calculate norm 2 of columns of a matrix and return number of column with maximum norm

In [778]:
def arg_max(D,roundd):
    list1=[]
    for i in range(173):
        list1.append(np.linalg.norm(D[roundd:,i]))
    return np.argmax(list1)

### a key sentence with rank k approximation

In [779]:
arg_max_D=arg_max(D,0)
arg_max_D

152

In [780]:
text[152]

'" "No, dear Beast," said Beauty, "you must not die'

### function for swap column of key sentence with first column

In [781]:
def swap(top_sentence,roundd,D):
    I=np.identity(173)
    I[:, [roundd, top_sentence]] = I[:, [top_sentence, roundd]]
    P=I
    DP=np.matmul(D, P)
    return DP

In [782]:
DP1=swap(152,0,D)

In [783]:
DP1.shape

(5, 173)

### function for sign

In [784]:
def sign(x):
    if x>=0:
        return 1
    else:
        return -1

### function for house holder matrix

In [785]:
def house_holder(roundd,DP):
    X=DP[roundd:,roundd]
    e=np.zeros(5-roundd)
    e[0]=1
    V=X+(sign(X[0])*(np.linalg.norm(X)*e))
    I2=np.identity(5-roundd)
    H=I2- 2*(np.outer(V,V.T) / np.dot(V.T,V))
    Q=np.identity(5)
    Q[roundd:,roundd:]=H
    return Q

In [786]:
Q1=house_holder(0,DP1)

In [787]:
Q1

array([[-0.10025434, -0.0023913 ,  0.        ,  0.        , -0.99495897],
       [-0.0023913 ,  0.9999948 ,  0.        ,  0.        , -0.00216245],
       [ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ],
       [-0.99495897, -0.00216245,  0.        ,  0.        ,  0.10025953]])

### function for calculate new C and D

In [788]:
def CD_new(Q,DP,C,roundd):
    Q_DP=np.matmul(Q.T,DP)
    C_Q=np.matmul(C,Q)
    I=np.identity(5)
    I[roundd][roundd]=Q_DP[roundd][roundd]
    T=I
    C_new=np.matmul(C_Q,T)
    D_new=np.matmul(np.linalg.inv(T),Q_DP)
    return C_new,D_new

In [789]:
C1,D1=CD_new(Q1,DP1,C,0)

In [790]:
C1.shape

(964, 5)

### function that gives 10 most important words of the key sentence

In [791]:
def top_terms(C,roundd):
    enum_C=list(enumerate(C.T[roundd]))
    sorted_C=sorted(enum_C,key = lambda x: x[1],reverse=True)
    return sorted_C[:10]

### 2 keywords with rank k approximation

In [792]:
sorted_C1=top_terms(C1,0)
sorted_C1

[(727, 0.3624408018413213),
 (58, 0.23429468005326806),
 (206, 0.19101695669544505),
 (553, 0.16863436762337894),
 (877, 0.12594039578844077),
 (62, 0.12467970342940478),
 (629, 0.08280979238565978),
 (891, 0.0777366281310048),
 (461, 0.07355423288174213),
 (188, 0.07342271997236728)]

In [793]:
pipeline.get_feature_names_out()[727]

'said'

In [794]:
pipeline.get_feature_names_out()[58]

'beast'

In [795]:
D1.shape

(5, 173)

In [796]:
D1[:,0]

array([ 1.00000000e+00, -6.88507910e-20,  0.00000000e+00,  0.00000000e+00,
       -3.46944695e-18])

### a key sentence with rank k approximation

In [797]:
arg_max_D1=arg_max(D1,1)
arg_max_D1

68

In [798]:
text[68]

' The horse went of himself into the stable, and the good man and his daughter came into the great hall, where they found a table splendidly served up, and two covers'

In [799]:
DP2=swap(68,1,D1)

In [800]:
DP2.shape

(5, 173)

In [801]:
Q2=house_holder(1,DP2)
Q2

array([[ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -0.98406243, -0.17782332,  0.        ],
       [ 0.        , -0.98406243,  0.03162113, -0.17498925,  0.        ],
       [ 0.        , -0.17782332, -0.17498925,  0.96837887,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  1.        ]])

In [802]:
C2,D2=CD_new(Q2,DP2,C1,1)

### 2 keywords with rank k approximation

In [803]:
sorted_C2=top_terms(C2,1)
sorted_C2

[(355, 0.2012551227038237),
 (930, 0.18778169915320694),
 (100, 0.14902226917345482),
 (520, 0.1439600535567786),
 (63, 0.13768283895304417),
 (830, 0.11245129044895769),
 (414, 0.09835914443996492),
 (372, 0.08749904224516704),
 (568, 0.08056199875264478),
 (362, 0.0701659929596902)]

In [804]:
pipeline.get_feature_names_out()[355]

'good'

In [805]:
pipeline.get_feature_names_out()[930]

'went'

In [806]:
D2[:,0]

array([ 1.00000000e+00,  0.00000000e+00,  6.77534768e-20,  1.22432761e-20,
       -3.46944695e-18])

In [807]:
D2[:,1]

array([ 0.00000000e+00,  1.00000000e+00,  1.22136544e-19, -5.06629220e-18,
        0.00000000e+00])

### a key sentence with rank k approximation

In [808]:
arg_max_D2=arg_max(D2,2)
arg_max_D2

4

In [809]:
text[4]

' The two eldest had a great deal of pride, because they were rich'

In [810]:
DP3=swap(4,2,D2)

In [811]:
Q3=house_holder(2,DP3)

In [812]:
Q3

array([[ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  1.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -0.17782332,  0.98406243,  0.        ],
       [ 0.        ,  0.        ,  0.98406243,  0.17782332,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  1.        ]])

In [813]:
C3,D3=CD_new(Q3,DP3,C2,2)

### 2 keywords with rank k approximation

In [814]:
sorted_C3=top_terms(C3,2)
sorted_C3

[(362, 0.3047528914278769),
 (187, 0.12439956665420085),
 (58, 0.10517610797226831),
 (352, 0.0912763321926017),
 (661, 0.07842265360746081),
 (536, 0.07058187722062384),
 (243, 0.06948821513620902),
 (707, 0.0655162063766241),
 (819, 0.06451343101789972),
 (641, 0.06317802140659928)]

In [815]:
pipeline.get_feature_names_out()[362]

'great'

In [816]:
pipeline.get_feature_names_out()[187]

'deal'

In [817]:
D3[:,2]

array([ 0.00000000e+00,  1.76377230e-01,  1.00000000e+00, -1.78379922e-17,
        0.00000000e+00])

### a key sentence sentence with rank k approximation

In [818]:
arg_max_D3=arg_max(D3,3)
arg_max_D3

23

In [819]:
text[23]

' "What will you have, Beauty?" said her father'

In [820]:
DP4=swap(23,3,D3)

In [821]:
Q4=house_holder(3,DP4)

In [822]:
C4,D4=CD_new(Q4,DP4,C3,3)

### 2 keywords from rank k approximation

In [823]:
sorted_C4=top_terms(C4,3)
sorted_C4

[(287, 0.32276035185680446),
 (62, 0.19301730162308972),
 (753, 0.13398727155316553),
 (645, 0.10919394400081575),
 (472, 0.1072078164621924),
 (800, 0.1037130461204368),
 (727, 0.08985762617414975),
 (927, 0.07801076777299529),
 (189, 0.07598910215111816),
 (864, 0.07424738481957198)]

In [824]:
pipeline.get_feature_names_out()[287]

'father'

In [825]:
pipeline.get_feature_names_out()[62]

'beauty'

### a key sentence with rank k approximation

In [826]:
arg_max_D4=arg_max(D4,4)
arg_max_D4

123

In [827]:
text[123]

' Farewell Beauty'

In [828]:
DP5=swap(123,4,D4)
Q5=house_holder(4,DP5)
C5,D5=CD_new(Q5,DP5,C4,4)

### 2 keywords from rank k approxamation

In [829]:
sorted_C5=top_terms(C5,4)
sorted_C5

[(62, 0.40545859509274296),
 (283, 0.309735990600124),
 (767, 0.14544994671907685),
 (30, 0.10289617075217625),
 (205, 0.08649299086998936),
 (553, 0.0634286928742829),
 (433, 0.06277888087065962),
 (945, 0.05997904614870898),
 (498, 0.05289981152817064),
 (447, 0.052517171302930314)]

In [830]:
pipeline.get_feature_names_out()[283]

'farewell'

In [831]:
pipeline.get_feature_names_out()[767]

'sisters'