# Topic Modeling (with LSA) & Sentiment Analysis (with NB and LR)

## 1. Libraries

In [13]:
import numpy as np
import pandas as pd
import regex as re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
import treetaggerwrapper
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from tqdm import tqdm_notebook

## 2. Data Loading & Pre-Processing

In [14]:
# Loading the Data
csv = pd.read_csv('IMDB.csv') # 50,000 labeled {positive, negative} movie reviews

# Collection of reviews <- 'Review' column from the csv
collection = [review for review in csv.iloc[:,0]]

# 'y' into SKLearn (without OneHotEncoding) <- 'Sentiment' column from the csv
y = csv.iloc[:, 1]

## 3. Functions

### 3.1 Generating the Corpus & the Vocabulary

In [15]:
def generate_corpus_vocabulary(collection):
    
    """
    Description: Normalizes each document in a collection to generate a corpus & the vocabulary.
    
    - Input: 
        (1) A list of raw documents [[doc1],[doc2],...,[docN]] where each doc is a raw string.
        
    - Output:
        (1) A list of processed documents [[doc1], [doc2],..., [docN]] where on each doc
        the following operations have been applied:
            - Removal of non-alphabetic characters
            - Case folding of all words
            - Tokenization for each '\w+'
            - Removal of stopwords and punctuation
            - Lemmatization & stemming
        (2) A list [[word1],[word2],...,[wordN]] of all the unique words in the corpus.
    """
    
    # --------------------OBJECTS--------------------
    
    # Stopwords (NLTK)
    stopWords = stopwords.words('english')
    # Punctuation (Custom)
    punct = list(r"!`\"»«',(-....:;<>?)")
    # Lemmatizer (TreeTagger)
    tagger = treetaggerwrapper.TreeTagger(TAGLANG='en')
    # Stemmer (PorterStemmer)
    stemmer = SnowballStemmer("english")
    
    # --------------------LOOP--------------------
    
    corpus = []
    for doc in tqdm_notebook(collection):
        # Alphabetization
        _doc = re.sub('[^A-Za-z]', ' ', doc)
        # Case Folding
        _doc = _doc.lower()
        # Tokenization
        _doc = word_tokenize(_doc)
        # StopWords
        _doc = [word for word in _doc if not word in stopWords]
        # Punctuation
        _doc = [word for word in _doc if not word in punct]
        # Lemmatization
        _doc = [re.split(r'\t', word)[2] for word in tagger.tag_text(_doc)]
        # Stemming
        # _doc = [stemmer.stem(word) for word in _doc] # Better without
        # Joining
        _doc = ' '.join(_doc)
        # Appending
        if _doc != '':
            corpus.append(_doc)
    
    # --------------------VOCABULARY--------------------
    
    vocabulary = sorted(list(set(' '.join(corpus).split(" "))))
    
    # --------------------PRINT--------------------
    
    print("No. of Documents in the Corpus:", len(corpus))
    print("No. of Unique Words in the Corpus:", len(vocabulary))
    
    return corpus, vocabulary

### 3.2 Generating a Bag-Of-Words (BOW) Matrix

In [25]:
def generate_bow_matrix(corpus, min_df, vocabulary):
    
    """
    Description: Generates a Bag-Of-Words Matrix.
    
    - Input: A corpus of processed documents [doc1, doc2,..., docN].
    
    - Output: A (m x n) bag-of-words matrix, where m is the number of documents 
    in the corpus and n is the number of specified maximum features.
    """
    
    # Model
    bow_vectorizer = CountVectorizer(min_df=min_df,
                                     vocabulary=vocabulary)
    # Fit & Transform
    bow_matrix = bow_vectorizer.transform(collection)

    print("A", bow_matrix.shape, "BOW Matrix has been generated.\n")
    
    return bow_matrix

### 3.3 Transforming BOW Matrix into TF-IDF Matrix

In [17]:
def transform_into_tfidf(bow_matrix):
    
    """
    Description: Transforms a BOW Matrix into a TF-IDF Matrix.
    
    - Input: A BOW Matrix & a specified number N of maximum
    features(i.e., top N most frequent unique words).
    
    - Output: A (m x n) TF-IDF matrix, where m is the number of documents
    in the corpus and n is the number of specified maximum features.
    """

    # Model
    tfidf_transformer = TfidfTransformer()
    # Fit & Transform
    tfidf_matrix = tfidf_transformer.fit_transform(bow_matrix).toarray()
    
    print("A", bow_matrix.shape, "BOW Matrix has been transformed into a", tfidf_matrix.shape, "TF-IDF Matrix.\n")
    
    return tfidf_matrix

### 3.4 Transforming TF-IDF Matrix into (Truncated) SVD Matrix

In [18]:
def transform_into_svd(tfidf_matrix, n_components, n_iter):
    
    """
    Description: Transforms a TF-IDF Matrix into a (TruncateD) SVD Matrix.
    
    - Input: A TF-IDF Matrix, a specified number of maximum components
    (latent topics), & a specified number of iterations of the algorithm.
    
    - Output: A (m x n) SVD matrix, where m is the number of documents in the 
    corpus and n is the number of specified maximum components.
    """
    
    # Normalization
    normalizer = Normalizer(copy=False)    
    tfidf_matrix = normalizer.fit_transform(tfidf_matrix)
    # Model
    svd = TruncatedSVD(n_components=n_components, n_iter=n_iter)
    # Fit 
    svd.fit(tfidf_matrix)
    # Topics
    topics = svd.components_
    # Index
    index = ["document{}".format(i) for i in range(len(tfidf_matrix))]
    # Columns
    columns = ["topic{}".format(i) for i in range(n_components)]
    # DataFrame
    svd_matrix = pd.DataFrame(svd.transform(tfidf_matrix),
                              index=index,
                              columns=columns)
    
    print("A", tfidf_matrix.shape, "TF-IDF Matrix has been transformed into a", svd_matrix.shape, "SVD Matrix.\n")
    
    return svd_matrix, topics

### 3.5 Extracting Latent Topics from (Truncated) SVD Matrix 

In [19]:
def latent_topics(terms, topics):
    for i, topic in enumerate(topics):
        terms_topic = zip(terms, topic)
        sorted_terms = sorted(terms_topic, key= lambda x:x[1], reverse=True)[:10]
        print("Topic " + str(i) + ": ")
        for t in sorted_terms:
            print(t[0])

### 3.6 Computing Most (Cosine-Wise) Similar Terms

In [20]:
def compute_cos_sim(term):
    """
    Computes the cosine similarity of one TF-IDF-vectorized term with respect to
    the vocabulary and shows the top 10 most similar ones.
    
    - Input: Term.
    - Output: Real-valued number [0,1].
        
    """
    
    TF_IDF = pd.DataFrame(tfidf_matrix, columns=vocabulary)
    cos_sim = {}
    for i in tqdm_notebook(vocabulary):
        cos_sim[i] = cosine_similarity([TF_IDF[term].values], [TF_IDF[i].values]).round(3)
    _cos_sim = pd.Series(cos_sim).sort_values(ascending=False)
        
    print(f"Top 20 terms most (cosine-wise) similar to '{term}':")

    return pd.DataFrame(_cos_sim, columns=['Cosine']).head(21)

### 3.7 (Multinomial) Naïve Bayes Classifier

In [49]:
def multinomial_NB_classifier(matrix):
    
    """
    (Multinomial) Naïve Bayes Classifier.
    """
    
    # Splitting Data into Train & Test Set
    X_train, X_test, y_train, y_test = train_test_split(matrix, # BOW or TF-IDF
                                                        y,
                                                        test_size=0.20,
                                                        random_state=0)
    # Model
    naiveBayes = MultinomialNB()
    # Fit
    naiveBayes.fit(X_train, y_train)
    # Predictions
    y_pred = naiveBayes.predict(X_test)
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    accuracy = ((cm[0][0] + cm[1][1]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])) * 100
    
    print("(Multinomial) Naïve Bayes has been successfully applied to the data.\n")
    print("Confusion Matrix:\n\n", pd.DataFrame(cm))
    print("\nAccuracy:", accuracy, "%")

### 3.8 Logistic Regression Classifier

In [37]:
def logistic_regression_classifier(matrix):
    
    """
    Logistic Regression Classifier.
    """
    
    # Splitting Data into Train & Test Set
    X_train, X_test, y_train, y_test = train_test_split(matrix, # SVD 
                                                        y,
                                                        test_size=0.20,
                                                        random_state=0)
    # Model
    logisticRegression = LogisticRegression(solver='lbfgs',
                                            random_state = 0)
    # Fit
    logisticRegression.fit(X_train, y_train)
    # Predictions
    y_pred = logisticRegression.predict(X_test)
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    # Accuracy
    accuracy = ((cm[0][0] + cm[1][1]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])) * 100
    
    print("Logistic Regression has been successfully applied to the data.\n")
    print("Confusion Matrix:\n\n", pd.DataFrame(cm))
    print("\nAccuracy:", accuracy, "%")

## 4. Execution & Evaluation

Using the functions above...

First, I generate:
    - Corpus <- IMBD.csv
    - BOW Matrix <- Corpus
    - TF-IDF Matrix <- Bow Matrix
    - (Truncated) SVD Matrix <- TF-IDF Matrix

Then I extract the 1000 Latent Topics from the SVD Matrix, and compute
the top 20 terms cosine-wise most similar terms for 'bad' and 'good'. 

Finally, I apply the following algorithms to the data:
    - (Multinomial) Naïve Bayes <- BOW Matrix
    - (Multinomial) Naïve Bayes <- TF-IDF Matrix
    - Logistic Regression <- (Truncated) SVD Matrix

And for each model I calculate the % of accuracy. 

In [23]:
# Corpus <- IMBD.csv
corpus, vocabulary = generate_corpus_vocabulary(collection)

HBox(children=(IntProgress(value=0, max=50000), HTML(value='')))


No. of Documents in the Corpus: 50000
No. of Unique Words in the Corpus: 86892


In [27]:
# BOW Matrix <- Corpus
bow_matrix = generate_bow_matrix(corpus,
                                 min_df=2,
                                 vocabulary=vocabulary)

A (50000, 86892) BOW Matrix has been generated.



In [28]:
# TF-IDF Matrix <- BOW Matrix
tfidf_matrix = transform_into_tfidf(bow_matrix)

A (50000, 86892) BOW Matrix has been transformed into a (50000, 86892) TF-IDF Matrix.



In [40]:
# SVD Matrix <- TF-IDF Matrix
svd_matrix, topics = transform_into_svd(tfidf_matrix, 
                                         n_components=1000,
                                         n_iter=10)

A (50000, 86892) TF-IDF Matrix has been transformed into a (50000, 1000) SVD Matrix.



In [30]:
# Latent Topics <- SVD Matrix
latent_topics(vocabulary, topics)

Topic 0: 
br
and
to
in
movie
film
you
but
not
on
Topic 1: 
br
spoiler
fiend
match
uk
govinda
team
humour
nuclear
post
Topic 2: 
movie
you
bad
if
don
watch
just
br
worst
good
Topic 3: 
film
you
if
don
to
bad
have
be
can
re
Topic 4: 
show
you
series
episode
to
and
season
tv
if
will
Topic 5: 
you
and
great
film
love
if
will
best
story
movie
Topic 6: 
show
bad
good
and
really
funny
just
acting
but
episode
Topic 7: 
and
bad
you
horror
plot
no
up
guy
re
off
Topic 8: 
in
one
you
bad
series
seen
ever
on
scene
best
Topic 9: 
ever
and
worst
seen
have
film
on
time
movie
waste
Topic 10: 
horror
to
great
movie
film
show
on
series
out
best
Topic 11: 
have
series
book
story
seen
one
would
better
read
could
Topic 12: 
bad
to
acting
story
good
show
you
great
script
cast
Topic 13: 
show
book
movie
not
film
series
horror
and
you
plot
Topic 14: 
not
funny
comedy
be
ever
seen
one
worst
horror
show
Topic 15: 
horror
story
show
love
people
life
ever
one
great
seen
Topic 16: 
series
not
all
time
one
people
wi

Topic 127: 
got
part
being
maybe
beautiful
stupid
lot
wonderful
excellent
times
Topic 128: 
us
got
anyone
give
work
recommend
being
pretty
may
off
Topic 129: 
look
interesting
being
house
want
seeing
end
worth
documentary
actually
Topic 130: 
got
interesting
excellent
part
though
stupid
world
video
cast
terrible
Topic 131: 
awful
re
new
lot
want
girl
man
batman
watched
real
Topic 132: 
house
world
enjoy
quite
wife
kind
off
enough
guy
video
Topic 133: 
want
zombie
got
few
everyone
anyone
part
awful
re
real
Topic 134: 
work
ending
script
sex
play
few
world
gay
guy
horrible
Topic 135: 
interesting
world
horrible
work
awful
find
re
give
cast
back
Topic 136: 
john
pretty
money
being
role
always
feel
terrible
boring
excellent
Topic 137: 
times
give
watched
awful
director
john
house
girl
few
thought
Topic 138: 
being
batman
michael
director
quite
feel
ending
interesting
night
musical
Topic 139: 
feel
batman
performance
house
felt
look
stupid
something
poor
few
Topic 140: 
actually
short
enjoy

Topic 235: 
three
since
gore
hollywood
idea
point
everything
scary
actor
slasher
Topic 236: 
worse
entertaining
however
least
tom
classic
long
cartoon
god
mother
Topic 237: 
eddie
shot
van
please
murphy
especially
evil
definitely
true
hollywood
Topic 238: 
right
worse
far
own
waste
scary
especially
actor
french
dialogue
Topic 239: 
worse
season
making
point
scary
god
true
rather
oh
wonderful
Topic 240: 
three
production
day
last
take
david
hollywood
beautiful
making
job
Topic 241: 
although
mr
sure
come
entertaining
waste
actor
anything
idea
own
Topic 242: 
someone
line
columbo
around
mother
different
death
tom
jack
season
Topic 243: 
might
king
idea
sound
especially
come
last
space
earth
point
Topic 244: 
woman
come
columbo
dialogue
absolutely
last
fact
hard
mr
amazing
Topic 245: 
totally
shot
since
van
season
columbo
flick
making
actor
king
Topic 246: 
mr
come
far
three
wife
brilliant
anything
please
space
almost
Topic 247: 
dialogue
anything
eddie
definitely
nice
point
amazing
king


Topic 344: 
quality
care
opera
cool
place
person
either
bill
overall
dog
Topic 345: 
wish
lee
shot
care
rating
put
quality
charlie
sense
perfect
Topic 346: 
bill
human
robin
name
course
etc
expect
trying
opera
williams
Topic 347: 
second
bill
dance
seem
dancing
came
picture
try
guess
human
Topic 348: 
yes
home
ok
jim
allen
read
bruce
cool
used
bill
Topic 349: 
used
said
ray
sequel
allen
woody
opera
course
classic
sound
Topic 350: 
favorite
year
cool
wrong
home
keep
absolutely
bill
lead
used
Topic 351: 
cartoon
guess
second
used
yes
jackie
etc
chan
care
novel
Topic 352: 
island
quality
try
allen
person
writing
group
direction
woody
second
Topic 353: 
instead
thriller
charlie
brother
second
opera
yes
friend
violence
human
Topic 354: 
home
used
wrong
seem
brother
second
quality
english
decent
mind
Topic 355: 
instead
picture
jackie
chan
romantic
place
rest
heart
cartoon
history
Topic 356: 
tell
seem
history
ray
dark
used
etc
bruce
laurel
hardy
Topic 357: 
keep
lost
dead
full
simply
city
d

Topic 451: 
soundtrack
type
bruce
storyline
overall
south
george
park
dialog
wonder
Topic 452: 
age
run
wonder
coming
serious
perhaps
stuff
ridiculous
past
talk
Topic 453: 
expect
feeling
sort
peter
sad
de
entertainment
slasher
serious
past
Topic 454: 
paul
often
actress
help
rest
perhaps
feeling
final
beginning
realistic
Topic 455: 
several
overall
earth
wonder
sad
ago
along
werewolf
footage
surprised
Topic 456: 
run
romantic
days
lame
kill
several
given
miss
gone
scarlett
Topic 457: 
stop
certainly
opinion
avoid
doing
able
side
actress
non
writing
Topic 458: 
alien
type
wonder
problem
face
copy
island
school
comic
daughter
Topic 459: 
often
dull
use
past
case
sad
run
opinion
fight
ford
Topic 460: 
dull
brother
wonder
final
direction
band
paul
team
camp
extremely
Topic 461: 
class
rest
reality
seriously
experience
wonder
predictable
fight
feeling
realistic
Topic 462: 
check
act
often
dull
talk
predictable
daughter
group
evil
viewer
Topic 463: 
rest
dull
type
save
except
age
doing
brit

Topic 555: 
except
dialog
anyway
buy
shame
past
eye
poorly
happen
comic
Topic 556: 
cheap
four
scarlett
coming
hour
finally
weak
usual
extremely
turn
Topic 557: 
norris
chuck
close
hit
buy
attempt
save
given
cheesy
water
Topic 558: 
ninja
writer
suspense
often
simple
serious
beyond
jones
enjoyable
blah
Topic 559: 
turn
number
sorry
side
eye
ridiculous
awesome
davis
except
message
Topic 560: 
case
badly
prison
happy
hit
cheesy
comment
richard
talking
chuck
Topic 561: 
power
viewer
jones
ninja
badly
sorry
peter
joke
problem
local
Topic 562: 
become
told
cheesy
beyond
except
obviously
scooby
storyline
cage
case
Topic 563: 
superb
viewer
four
self
weak
slasher
save
keaton
comic
fulci
Topic 564: 
obviously
sit
haven
william
husband
often
saying
mystery
ninja
entire
Topic 565: 
able
serious
despite
complete
hot
review
example
check
holly
number
Topic 566: 
awesome
cop
sorry
release
joke
oscar
viewing
happy
cut
unfortunately
Topic 567: 
later
hour
flynn
hot
believable
happy
despite
porn
roman

Topic 659: 
interest
blah
important
number
light
baby
basically
cheap
add
list
Topic 660: 
strange
hand
blah
relationship
ha
coming
small
typical
imagine
pointless
Topic 661: 
usual
expected
ryan
directed
local
imdb
view
channel
five
lack
Topic 662: 
level
talking
late
past
rate
comment
beyond
behind
romance
box
Topic 663: 
welles
room
saying
garbage
complete
plus
stay
anyway
number
obvious
Topic 664: 
release
victoria
tony
bed
prince
important
spent
party
ghost
sister
Topic 665: 
believable
ha
obvious
glad
call
cinderella
tried
sick
prince
twist
Topic 666: 
none
premise
lady
hot
effort
sexual
pathetic
cover
talking
woods
Topic 667: 
obvious
usual
adventure
stay
peter
change
expecting
unless
despite
leave
Topic 668: 
basically
important
score
scooby
already
change
doo
incredible
forget
khan
Topic 669: 
involved
burt
blah
laughing
crazy
score
reynolds
order
moving
dan
Topic 670: 
none
strange
okay
incredibly
viewing
hand
change
forget
plus
talking
Topic 671: 
master
easily
word
cute
tal

Topic 764: 
culture
needs
party
die
laughing
clever
running
plus
plain
surprise
Topic 765: 
bunch
effort
known
che
bourne
running
adventure
credits
masterpiece
cut
Topic 766: 
pointless
low
rate
fighting
scenery
powerful
effect
fake
potential
clever
Topic 767: 
important
america
sam
master
prison
unless
modern
adult
mess
apparently
Topic 768: 
plus
important
somewhat
alone
plain
across
expecting
pointless
cause
drug
Topic 769: 
madonna
dan
clever
bunch
bought
ha
bourne
super
interest
particularly
Topic 770: 
edge
non
surprise
typical
local
saying
twist
single
agree
seat
Topic 771: 
alice
karen
expecting
spoiler
alone
brain
sub
body
red
low
Topic 772: 
madonna
bother
spoiler
theatre
hear
showing
material
needs
spirit
johnny
Topic 773: 
rate
casting
edge
fall
mad
seat
doubt
incredibly
books
showing
Topic 774: 
fox
moment
add
christopher
taking
clearly
bring
running
easy
plain
Topic 775: 
wow
bed
masterpiece
somewhat
fake
level
change
upon
glad
ha
Topic 776: 
effort
mostly
fox
pointless
i

Topic 865: 
plenty
exactly
fake
near
pointless
america
spent
sandra
garbage
open
Topic 866: 
interested
adult
running
clear
wow
easily
possibly
agree
club
disturbing
Topic 867: 
unbelievable
adult
scarecrow
free
emotional
powerful
pointless
supporting
edge
bunch
Topic 868: 
bother
football
apparently
streisand
pure
potential
failed
drug
london
west
Topic 869: 
within
certain
bring
fairly
imagine
mess
similar
needs
admit
somehow
Topic 870: 
move
certain
adventure
elvis
waiting
dan
torture
anime
perfectly
books
Topic 871: 
cause
forced
follow
screenplay
die
talented
supporting
johnny
add
bother
Topic 872: 
plus
showing
rich
animated
painful
middle
possible
development
possibly
bed
Topic 873: 
clearly
total
america
general
touching
andy
waiting
paulie
stone
needed
Topic 874: 
within
thats
apparently
upon
mention
unbelievable
realize
potential
hear
waiting
Topic 875: 
battle
girlfriend
language
value
general
spirit
unique
otherwise
possibly
screenplay
Topic 876: 
books
crew
anime
teen
peri

Topic 963: 
exciting
hair
bronson
program
genius
charles
among
general
theatre
walking
Topic 964: 
personally
famous
lucy
exciting
sub
situation
particular
justice
nancy
ride
Topic 965: 
note
plenty
elvira
break
language
boyfriend
recently
standard
effect
truth
Topic 966: 
near
shark
concept
terrific
disturbing
recently
clearly
taking
speak
perfectly
Topic 967: 
added
difficult
plane
telling
basic
likes
spanish
ask
hurt
missing
Topic 968: 
taste
consider
post
lion
forced
thomas
crappy
result
quickly
opening
Topic 969: 
appreciate
west
soul
dramatic
sadly
animated
supporting
likes
suggest
convincing
Topic 970: 
nick
yeah
betty
basic
judge
favourite
needed
zero
pick
billy
Topic 971: 
ernest
disappointing
club
somehow
exciting
silent
laughable
barney
recently
ghoulies
Topic 972: 
hardly
nobody
exciting
program
kinda
century
anti
potential
return
telling
Topic 973: 
available
somehow
filmmakers
villain
teenage
deal
disappointing
likes
memorable
rare
Topic 974: 
third
form
credit
track
sugg

In [31]:
# Term: 'bad'
compute_cos_sim('bad')

HBox(children=(IntProgress(value=0, max=86892), HTML(value='')))


Top 20 terms most (cosine-wise) similar to 'bad':


Unnamed: 0,Cosine
bad,[[1.0]]
movie,[[0.345]]
to,[[0.336]]
acting,[[0.324]]
but,[[0.319]]
and,[[0.317]]
so,[[0.315]]
not,[[0.298]]
in,[[0.296]]
just,[[0.289]]


In [32]:
# Term: 'good'
compute_cos_sim('good')

HBox(children=(IntProgress(value=0, max=86892), HTML(value='')))


Top 20 terms most (cosine-wise) similar to 'good':


Unnamed: 0,Cosine
good,[[1.0]]
and,[[0.461]]
but,[[0.459]]
to,[[0.437]]
in,[[0.41]]
movie,[[0.41]]
not,[[0.385]]
have,[[0.351]]
br,[[0.35]]
be,[[0.349]]


In [50]:
# (Multinomial) Naïve Bayes <- BOW Matrix
multinomial_NB_classifier(bow_matrix)

(Multinomial) Naïve Bayes has been successfully applied to the data.

Confusion Matrix:

       0     1
0  4434   601
1   899  4066

Accuracy: 85.0 %


In [44]:
# (Multinomial) Naïve Bayes <- TF-IDF Matrix
multinomial_NB_classifier(tfidf_matrix)

(Multinomial) Naïve Bayes has been successfully applied to the data.

Confusion Matrix:

       0     1
0  4293   742
1   965  4000

Accuracy: 82.93 %


In [46]:
# Logistic Regression <- (Truncated) SVD Matrix
logistic_regression_classifier(svd_matrix)

Logistic Regression has been successfully applied to the data.

Confusion Matrix:

       0     1
0  4302   733
1   518  4447

Accuracy: 87.49 %
