In [3]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#NLP Imports
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay, confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

In [2]:
df = pd.read_csv('../project-5/clean_df.csv')
df

Unnamed: 0,review,sentiment,review_clean
0,One of the other reviewers has mentioned that ...,positive,one of the other reviewers has mentioned that ...
1,A wonderful little production. <br /><br />The...,positive,a wonderful little production the filming tech...
2,I thought this was a wonderful way to spend ti...,positive,i thought this was a wonderful way to spend ti...
3,Basically there's a family where a little boy ...,negative,basically there s a family where a little boy ...
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,petter mattei s love in the time of money is a...
...,...,...,...
49995,I thought this movie did a down right good job...,positive,i thought this movie did a down right good job...
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative,bad plot bad dialogue bad acting idiotic direc...
49997,I am a Catholic taught in parochial elementary...,negative,i am a catholic taught in parochial elementary...
49998,I'm going to have to disagree with the previou...,negative,i m going to have to disagree with the previou...


In [4]:
lemmatizer = WordNetLemmatizer()

In [9]:
tokenizer = RegexpTokenizer('\s+', gaps=True)

In [10]:
df['review_clean_list'] = df['review_clean'].apply(tokenizer.tokenize)
df.head()

Unnamed: 0,review,sentiment,review_clean,review_clean_list
0,One of the other reviewers has mentioned that ...,positive,one of the other reviewers has mentioned that ...,"[one, of, the, other, reviewers, has, mentione..."
1,A wonderful little production. <br /><br />The...,positive,a wonderful little production the filming tech...,"[a, wonderful, little, production, the, filmin..."
2,I thought this was a wonderful way to spend ti...,positive,i thought this was a wonderful way to spend ti...,"[i, thought, this, was, a, wonderful, way, to,..."
3,Basically there's a family where a little boy ...,negative,basically there s a family where a little boy ...,"[basically, there, s, a, family, where, a, lit..."
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,petter mattei s love in the time of money is a...,"[petter, mattei, s, love, in, the, time, of, m..."


In [15]:
df['review_lem'] = df['review_clean_list'].apply(lambda x: [lemmatizer.lemmatize(y) for y in x])
df.head()

Unnamed: 0,review,sentiment,review_clean,review_clean_list,review_lem
0,One of the other reviewers has mentioned that ...,positive,one of the other reviewers has mentioned that ...,"[one, of, the, other, reviewers, has, mentione...","[one, of, the, other, reviewer, ha, mentioned,..."
1,A wonderful little production. <br /><br />The...,positive,a wonderful little production the filming tech...,"[a, wonderful, little, production, the, filmin...","[a, wonderful, little, production, the, filmin..."
2,I thought this was a wonderful way to spend ti...,positive,i thought this was a wonderful way to spend ti...,"[i, thought, this, was, a, wonderful, way, to,...","[i, thought, this, wa, a, wonderful, way, to, ..."
3,Basically there's a family where a little boy ...,negative,basically there s a family where a little boy ...,"[basically, there, s, a, family, where, a, lit...","[basically, there, s, a, family, where, a, lit..."
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,petter mattei s love in the time of money is a...,"[petter, mattei, s, love, in, the, time, of, m...","[petter, mattei, s, love, in, the, time, of, m..."


In [16]:
pd.set_option('max_colwidth', 800)
df[['review_clean_list', 'review_lem']]

Unnamed: 0,review_clean_list,review_lem
0,"[one, of, the, other, reviewers, has, mentioned, that, after, watching, just, 1, oz, episode, you, ll, be, hooked, they, are, right, as, this, is, exactly, what, happened, with, me, the, first, thing, that, struck, me, about, oz, was, its, brutality, and, unflinching, scenes, of, violence, which, set, in, right, from, the, word, go, trust, me, this, is, not, a, show, for, the, faint, hearted, or, timid, this, show, pulls, no, punches, with, regards, to, drugs, sex, or, violence, its, is, hardcore, in, the, classic, use, of, the, word, it, is, called, oz, as, that, is, the, nickname, given, to, the, ...]","[one, of, the, other, reviewer, ha, mentioned, that, after, watching, just, 1, oz, episode, you, ll, be, hooked, they, are, right, a, this, is, exactly, what, happened, with, me, the, first, thing, that, struck, me, about, oz, wa, it, brutality, and, unflinching, scene, of, violence, which, set, in, right, from, the, word, go, trust, me, this, is, not, a, show, for, the, faint, hearted, or, timid, this, show, pull, no, punch, with, regard, to, drug, sex, or, violence, it, is, hardcore, in, the, classic, use, of, the, word, it, is, called, oz, a, that, is, the, nickname, given, to, the, ...]"
1,"[a, wonderful, little, production, the, filming, technique, is, very, unassuming, very, old, time, bbc, fashion, and, gives, a, comforting, and, sometimes, discomforting, sense, of, realism, to, the, entire, piece, the, actors, are, extremely, well, chosen, michael, sheen, not, only, has, got, all, the, polari, but, he, has, all, the, voices, down, pat, too, you, can, truly, see, the, seamless, editing, guided, by, the, references, to, williams, diary, entries, not, only, is, it, well, worth, the, watching, but, it, is, a, terrificly, written, and, performed, piece, a, masterful, production, about, one, of, the, great, master, s, of, comedy, and, his, life, ...]","[a, wonderful, little, production, the, filming, technique, is, very, unassuming, very, old, time, bbc, fashion, and, give, a, comforting, and, sometimes, discomforting, sense, of, realism, to, the, entire, piece, the, actor, are, extremely, well, chosen, michael, sheen, not, only, ha, got, all, the, polari, but, he, ha, all, the, voice, down, pat, too, you, can, truly, see, the, seamless, editing, guided, by, the, reference, to, williams, diary, entry, not, only, is, it, well, worth, the, watching, but, it, is, a, terrificly, written, and, performed, piece, a, masterful, production, about, one, of, the, great, master, s, of, comedy, and, his, life, ...]"
2,"[i, thought, this, was, a, wonderful, way, to, spend, time, on, a, too, hot, summer, weekend, sitting, in, the, air, conditioned, theater, and, watching, a, light, hearted, comedy, the, plot, is, simplistic, but, the, dialogue, is, witty, and, the, characters, are, likable, even, the, well, bread, suspected, serial, killer, while, some, may, be, disappointed, when, they, realize, this, is, not, match, point, 2, risk, addiction, i, thought, it, was, proof, that, woody, allen, is, still, fully, in, control, of, the, style, many, of, us, have, grown, to, love, this, was, the, most, i, d, laughed, at, one, of, woody, s, ...]","[i, thought, this, wa, a, wonderful, way, to, spend, time, on, a, too, hot, summer, weekend, sitting, in, the, air, conditioned, theater, and, watching, a, light, hearted, comedy, the, plot, is, simplistic, but, the, dialogue, is, witty, and, the, character, are, likable, even, the, well, bread, suspected, serial, killer, while, some, may, be, disappointed, when, they, realize, this, is, not, match, point, 2, risk, addiction, i, thought, it, wa, proof, that, woody, allen, is, still, fully, in, control, of, the, style, many, of, u, have, grown, to, love, this, wa, the, most, i, d, laughed, at, one, of, woody, s, ...]"
3,"[basically, there, s, a, family, where, a, little, boy, jake, thinks, there, s, a, zombie, in, his, closet, his, parents, are, fighting, all, the, time, this, movie, is, slower, than, a, soap, opera, and, suddenly, jake, decides, to, become, rambo, and, kill, the, zombie, ok, first, of, all, when, you, re, going, to, make, a, film, you, must, decide, if, its, a, thriller, or, a, drama, as, a, drama, the, movie, is, watchable, parents, are, divorcing, arguing, like, in, real, life, and, then, we, have, jake, with, his, closet, which, totally, ruins, all, the, film, i, expected, to, see, a, ...]","[basically, there, s, a, family, where, a, little, boy, jake, think, there, s, a, zombie, in, his, closet, his, parent, are, fighting, all, the, time, this, movie, is, slower, than, a, soap, opera, and, suddenly, jake, decides, to, become, rambo, and, kill, the, zombie, ok, first, of, all, when, you, re, going, to, make, a, film, you, must, decide, if, it, a, thriller, or, a, drama, a, a, drama, the, movie, is, watchable, parent, are, divorcing, arguing, like, in, real, life, and, then, we, have, jake, with, his, closet, which, totally, ruin, all, the, film, i, expected, to, see, a, ...]"
4,"[petter, mattei, s, love, in, the, time, of, money, is, a, visually, stunning, film, to, watch, mr, mattei, offers, us, a, vivid, portrait, about, human, relations, this, is, a, movie, that, seems, to, be, telling, us, what, money, power, and, success, do, to, people, in, the, different, situations, we, encounter, this, being, a, variation, on, the, arthur, schnitzler, s, play, about, the, same, theme, the, director, transfers, the, action, to, the, present, time, new, york, where, all, these, different, characters, meet, and, connect, each, one, is, connected, in, one, way, or, another, to, the, next, person, but, no, one, seems, ...]","[petter, mattei, s, love, in, the, time, of, money, is, a, visually, stunning, film, to, watch, mr, mattei, offer, u, a, vivid, portrait, about, human, relation, this, is, a, movie, that, seems, to, be, telling, u, what, money, power, and, success, do, to, people, in, the, different, situation, we, encounter, this, being, a, variation, on, the, arthur, schnitzler, s, play, about, the, same, theme, the, director, transfer, the, action, to, the, present, time, new, york, where, all, these, different, character, meet, and, connect, each, one, is, connected, in, one, way, or, another, to, the, next, person, but, no, one, seems, ...]"
...,...,...
49995,"[i, thought, this, movie, did, a, down, right, good, job, it, wasn, t, as, creative, or, original, as, the, first, but, who, was, expecting, it, to, be, it, was, a, whole, lotta, fun, the, more, i, think, about, it, the, more, i, like, it, and, when, it, comes, out, on, dvd, i, m, going, to, pay, the, money, for, it, very, proudly, every, last, cent, sharon, stone, is, great, she, always, is, even, if, her, movie, is, horrible, catwoman, but, this, movie, isn, t, this, is, one, of, those, movies, that, will, be, underrated, for, its, lifetime, and, it, will, ...]","[i, thought, this, movie, did, a, down, right, good, job, it, wasn, t, a, creative, or, original, a, the, first, but, who, wa, expecting, it, to, be, it, wa, a, whole, lotta, fun, the, more, i, think, about, it, the, more, i, like, it, and, when, it, come, out, on, dvd, i, m, going, to, pay, the, money, for, it, very, proudly, every, last, cent, sharon, stone, is, great, she, always, is, even, if, her, movie, is, horrible, catwoman, but, this, movie, isn, t, this, is, one, of, those, movie, that, will, be, underrated, for, it, lifetime, and, it, will, ...]"
49996,"[bad, plot, bad, dialogue, bad, acting, idiotic, directing, the, annoying, porn, groove, soundtrack, that, ran, continually, over, the, overacted, script, and, a, crappy, copy, of, the, vhs, cannot, be, redeemed, by, consuming, liquor, trust, me, because, i, stuck, this, turkey, out, to, the, end, it, was, so, pathetically, bad, all, over, that, i, had, to, figure, it, was, a, fourth, rate, spoof, of, springtime, for, hitler, the, girl, who, played, janis, joplin, was, the, only, faint, spark, of, interest, and, that, was, only, because, she, could, sing, better, than, the, original, if, you, want, to, watch, something, similar, but, a, ...]","[bad, plot, bad, dialogue, bad, acting, idiotic, directing, the, annoying, porn, groove, soundtrack, that, ran, continually, over, the, overacted, script, and, a, crappy, copy, of, the, vhs, cannot, be, redeemed, by, consuming, liquor, trust, me, because, i, stuck, this, turkey, out, to, the, end, it, wa, so, pathetically, bad, all, over, that, i, had, to, figure, it, wa, a, fourth, rate, spoof, of, springtime, for, hitler, the, girl, who, played, janis, joplin, wa, the, only, faint, spark, of, interest, and, that, wa, only, because, she, could, sing, better, than, the, original, if, you, want, to, watch, something, similar, but, a, ...]"
49997,"[i, am, a, catholic, taught, in, parochial, elementary, schools, by, nuns, taught, by, jesuit, priests, in, high, school, college, i, am, still, a, practicing, catholic, but, would, not, be, considered, a, good, catholic, in, the, church, s, eyes, because, i, don, t, believe, certain, things, or, act, certain, ways, just, because, the, church, tells, me, to, so, back, to, the, movie, its, bad, because, two, people, are, killed, by, this, nun, who, is, supposed, to, be, a, satire, as, the, embodiment, of, a, female, religious, figurehead, there, is, no, comedy, in, that, and, the, satire, is, not, done, well, by, ...]","[i, am, a, catholic, taught, in, parochial, elementary, school, by, nun, taught, by, jesuit, priest, in, high, school, college, i, am, still, a, practicing, catholic, but, would, not, be, considered, a, good, catholic, in, the, church, s, eye, because, i, don, t, believe, certain, thing, or, act, certain, way, just, because, the, church, tell, me, to, so, back, to, the, movie, it, bad, because, two, people, are, killed, by, this, nun, who, is, supposed, to, be, a, satire, a, the, embodiment, of, a, female, religious, figurehead, there, is, no, comedy, in, that, and, the, satire, is, not, done, well, by, ...]"
49998,"[i, m, going, to, have, to, disagree, with, the, previous, comment, and, side, with, maltin, on, this, one, this, is, a, second, rate, excessively, vicious, western, that, creaks, and, groans, trying, to, put, across, its, central, theme, of, the, wild, west, being, tamed, and, kicked, aside, by, the, steady, march, of, time, it, would, like, to, be, in, the, tradition, of, butch, cassidy, and, the, sundance, kid, but, lacks, that, film, s, poignancy, and, charm, andrew, mclaglen, s, direction, is, limp, and, the, final, 30, minutes, or, so, are, a, real, botch, with, some, incomprehensible, strategy, on, the, part, of, ...]","[i, m, going, to, have, to, disagree, with, the, previous, comment, and, side, with, maltin, on, this, one, this, is, a, second, rate, excessively, vicious, western, that, creak, and, groan, trying, to, put, across, it, central, theme, of, the, wild, west, being, tamed, and, kicked, aside, by, the, steady, march, of, time, it, would, like, to, be, in, the, tradition, of, butch, cassidy, and, the, sundance, kid, but, lack, that, film, s, poignancy, and, charm, andrew, mclaglen, s, direction, is, limp, and, the, final, 30, minute, or, so, are, a, real, botch, with, some, incomprehensible, strategy, on, the, part, of, ...]"
