### Assignment 2

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import re
from nltk.stem import PorterStemmer
import nltk
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from gensim.models import Word2Vec



nltk.download('stopwords')
nltk.download('opinion_lexicon')
from nltk.corpus import opinion_lexicon
from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kkasp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\kkasp\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


In [16]:
file_path = r'IMDB Dataset.csv'
data = pd.read_csv(file_path)
print(data.head(20))


                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

In [17]:
X = data['review']

def transform_bin(x):
  if x == "positive":
    return 1
  else:
    return 0
data['sentiment'] = data['sentiment'].apply(lambda x : transform_bin(x))

y = data['sentiment']


In [18]:
# Data split (80% training, 20% validation)
train_data, X_test, train_label, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(train_data, train_label, test_size=0.2, random_state=42)

X_train = X_train.to_frame()
X_test = X_test.to_frame()
X_val = X_val.to_frame()

# Display the sizes of the datasets
print("Training Data Size:", len(X_train))
print("Validation Data Size:", len(X_val))
print("Test Data Size:", len(X_test))

Training Data Size: 32000
Validation Data Size: 8000
Test Data Size: 10000


##### Text pre-processing

In [19]:
X_train['review']

11794                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

In [20]:
def clean_text(text):
    # Remove unusual or non-ASCII characters
    text = re.sub(r'[^\x00-\x7F]+', '', text)

    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)

    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()

    # Tokenize the text into words for processing
    words = text.split()

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word.lower() not in stop_words]


    return filtered_words

X_train['cleaned_review'] = X_train['review'].apply(clean_text)
X_val['cleaned_review']  = X_val['review'].apply(clean_text)
X_test['cleaned_review']  = X_test['review'].apply(clean_text)


# Display the first few cleaned reviews
print("Cleaned Reviews with Stopwords Removed:")
print(X_train['cleaned_review'].head(20))

Cleaned Reviews with Stopwords Removed:
11794                                                                                                                                                                                                                                                           [fault, actors, put, great, performances, overall, story, well, executed, movie, opens, great, zinger, crazy, old, guy, forces, young, Aborigine, girls, car, road, forced, endure, 40, minutes, character, development, entirely, new, group, characters, dont, know, 40, minutes, turns, ones, eventually, discover, girls, body, story, progresses, therebr, br, story, pick, point, really, goes, nowhere, 2, hours, asked, point, see, characters, struggle, accusations, racism, stupidity, handled, discovery, story, ultimately, unsatisfying, felt, unfinished, well, acted, theres, strong, enough, backbone, film, warrant, recommending]
24925                                                                      

Further preprocessing: Features and Embeddings

Features

Here we create: Number of Positive lexicon words, Number of Negative lexicon words, Number of nos, and Number of words. After that the values were standardized

In [25]:
# 1. Lexicon Creation
positive_lexicon = set(opinion_lexicon.positive()) #list of positive words from the NLTK package
negative_lexicon = set(opinion_lexicon.negative()) #list of negative words from the NLTK package

# 2. Defining Functions
def counts_lex(review, type = 'neg'):
    count_neg = 0
    count_pos = 0
    for word in review:
      if word.lower() in positive_lexicon:
        count_pos += 1
      if word.lower() in negative_lexicon:
        count_neg += 1
    if type == 'neg':
      return count_neg
    elif type == 'pos':
      return count_pos

# Ensure reviews are tokenized, we use the review column as 'no' and 'not' gets removed by stopwords
def count_nos(review):
    count = 0
    words = review.split()  # Tokenize if review is a single string
    for word in words:
        if word.lower() == "no" or word.lower() == "not":
            count += 1
    return count

# 3. Feature Extraction for Training, Validation, and Test Sets
X_train['NrPos'] = X_train['cleaned_review'].apply(lambda x : counts_lex(x, type = "pos"))
X_train['NrNeg'] = X_train['cleaned_review'].apply(lambda x : counts_lex(x, type = "neg"))
X_train['NrWords'] = X_train['cleaned_review'].apply(lambda x: len(x))
X_train['Nos'] = X_train['review'].apply(lambda x : count_nos(x))

X_val['NrPos'] = X_val['cleaned_review'].apply(lambda x : counts_lex(x, type = "pos"))
X_val['NrNeg'] = X_val['cleaned_review'].apply(lambda x : counts_lex(x, type = "neg"))
X_val['NrWords'] = X_val['cleaned_review'].apply(lambda x: len(x))
X_val['Nos'] = X_val['review'].apply(lambda x : count_nos(x))

X_test['NrPos'] = X_test['cleaned_review'].apply(lambda x : counts_lex(x, type = "pos"))
X_test['NrNeg'] = X_test['cleaned_review'].apply(lambda x : counts_lex(x, type = "neg"))
X_test['NrWords'] = X_test['cleaned_review'].apply(lambda x: len(x))
X_test['Nos'] = X_test['review'].apply(lambda x : count_nos(x))

# 4. Standardizing Features
cols = ['NrPos', 'NrNeg', 'NrWords', 'Nos']
train_features_to_transform = X_train[cols]
val_features_to_transform = X_val[cols]
test_features_to_transform = X_test[cols]

scaler = StandardScaler()
X_train_std = scaler.fit_transform(train_features_to_transform)  # X is your feature matrix
X_val_std = scaler.transform(val_features_to_transform)
X_test_std = scaler.transform(test_features_to_transform)

X_train[cols] = X_train_std
X_val[cols] = X_val_std
X_test[cols] = X_test_std

# 5. Stemming Words
# Apply stemming
stemmer = PorterStemmer()

X_train['Stem Words'] = X_train['cleaned_review'].apply(lambda x : [stemmer.stem(word.lower()) for word in x])
X_val['Stem Words'] = X_val['cleaned_review'].apply(lambda x : [stemmer.stem(word.lower()) for word in x])
X_test['Stem Words'] = X_test['cleaned_review'].apply(lambda x : [stemmer.stem(word.lower()) for word in x])

# 6. Output for debugging
# Display the first few cleaned reviews
print("Training Data with cleaned reviews with Stemming and New Features:")
print(X_train['cleaned_review'].head(20))

Training Data with cleaned reviews with Stemming and New Features:
11794                                                                                                                                                                                                                                                           [fault, actors, put, great, performances, overall, story, well, executed, movie, opens, great, zinger, crazy, old, guy, forces, young, Aborigine, girls, car, road, forced, endure, 40, minutes, character, development, entirely, new, group, characters, dont, know, 40, minutes, turns, ones, eventually, discover, girls, body, story, progresses, therebr, br, story, pick, point, really, goes, nowhere, 2, hours, asked, point, see, characters, struggle, accusations, racism, stupidity, handled, discovery, story, ultimately, unsatisfying, felt, unfinished, well, acted, theres, strong, enough, backbone, film, warrant, recommending]
24925                                           

In [26]:
X_train

Unnamed: 0,review,cleaned_review,NrPos,NrNeg,NrWords,Nos,Stem Words
11794,"With no fault to the actors (they all put on great performances), the overall story was not very well executed. The movie opens with a great zinger: a crazy old guy forces a young Aborigine girl's car off the road. But then, we're forced to endure 40 minutes of character development with an entirely new group of characters ... and we don't know why until the 40 minutes are up. It turns out that they are the ones who eventually discover the girl's body ... and the story progresses from there.<br /><br />While the story does pick up at that point, it really goes nowhere. After 2 hours, I asked myself: was there a point to this, or was it just to see the characters struggle with accusations of racism and stupidity of how they handled the discovery? The story was ultimately unsatisfying and felt unfinished. While it is well acted, there's not a strong enough backbone in the film to warrant recommending it.","[fault, actors, put, great, performances, overall, story, well, executed, movie, opens, great, zinger, crazy, old, guy, forces, young, Aborigine, girls, car, road, forced, endure, 40, minutes, character, development, entirely, new, group, characters, dont, know, 40, minutes, turns, ones, eventually, discover, girls, body, story, progresses, therebr, br, story, pick, point, really, goes, nowhere, 2, hours, asked, point, see, characters, struggle, accusations, racism, stupidity, handled, discovery, story, ultimately, unsatisfying, felt, unfinished, well, acted, theres, strong, enough, backbone, film, warrant, recommending]",-0.349058,-0.255423,-0.481820,0.761786,"[fault, actor, put, great, perform, overal, stori, well, execut, movi, open, great, zinger, crazi, old, guy, forc, young, aborigin, girl, car, road, forc, endur, 40, minut, charact, develop, entir, new, group, charact, dont, know, 40, minut, turn, one, eventu, discov, girl, bodi, stori, progress, therebr, br, stori, pick, point, realli, goe, nowher, 2, hour, ask, point, see, charact, struggl, accus, racism, stupid, handl, discoveri, stori, ultim, unsatisfi, felt, unfinish, well, act, there, strong, enough, backbon, film, warrant, recommend]"
24925,"The first thing I thought when I saw this films was: It is not really a film, at least it is not what we imagine spontaneously when we hear the word ""film"". it is entirely symbolic, everything in it has a figurative meaning. So if you are not used to express thing in a symbolic way, you will find it strange, if you are not acquainted with philosophy, religion, spiritual life, you will think it's just a fairy-tale... and even a weird one, chaotic. For me ""The legend of Zu"" is perfectly transparent. And I do like it. It tells us in images the story about the fight between light and darkness, the fight that is as old as humanity, and every one who is in search of the sens in this life is confronted with it. The film is obviously made by Buddhists. I am not a Buddhist. My religion and the vision of the world and human is different. But as far as we are all humans and have the same human nature we necessarily have common experiences and can understand each other. It is a really beautiful film! And I which we had more films like this - films that have a meaning. There are too many empty stories which are good only to make time pass more quickly.","[first, thing, thought, saw, films, really, film, least, imagine, spontaneously, hear, word, film, entirely, symbolic, everything, figurative, meaning, used, express, thing, symbolic, way, find, strange, acquainted, philosophy, religion, spiritual, life, think, fairytale, even, weird, one, chaotic, legend, Zu, perfectly, transparent, like, tells, us, images, story, fight, light, darkness, fight, old, humanity, every, one, search, sens, life, confronted, film, obviously, made, Buddhists, Buddhist, religion, vision, world, human, different, far, humans, human, nature, necessarily, common, experiences, understand, really, beautiful, film, films, like, films, meaning, many, empty, stories, good, make, time, pass, quickly]",-0.349058,-0.623733,-0.351669,1.839063,"[first, thing, thought, saw, film, realli, film, least, imagin, spontan, hear, word, film, entir, symbol, everyth, figur, mean, use, express, thing, symbol, way, find, strang, acquaint, philosophi, religion, spiritu, life, think, fairytal, even, weird, one, chaotic, legend, zu, perfectli, transpar, like, tell, us, imag, stori, fight, light, dark, fight, old, human, everi, one, search, sen, life, confront, film, obvious, made, buddhist, buddhist, religion, vision, world, human, differ, far, human, human, natur, necessarili, common, experi, understand, realli, beauti, film, film, like, film, mean, mani, empti, stori, good, make, time, pass, quickli]"
28578,"Post-feminist depiction of cruelty and sadism.<br /><br />Spoiler alert! <br /><br />This underrated gem of a film tells the story of Flavia, a Fifteenth Century girl of Noble birth walled up in a convent after defining her father and indeed the whole of Medieval Christian society by viewing a fallen Islamic warrior as a human rather than demonic figure.<br /><br />Unable to accept the patriarchal rule of the convent (explicitly stated in a scene where the Bishop arrives flanked by soldiers and monks) Flavia begins to explicitly question the society in which she finds herself and, through butting up against a whole system of subjugation, repression and violence, inevitably brings a tragic end not only to herself but all those around her.<br /><br />Billed as a piece of nunsploitation this is far from the truth. This is a film depiction the consequences of violence, the effects of patriarchal dominance, the nature of rebellion and the corruption of the human spirit.<br /><br />I described it in the title of this piece as 'post-feminist' and in the end Flavia's triumphs must always be corrupted, compromised and perverted by men. Even Flavia's gruesome end is perpetrated by men for men (the women turn away and only the monks look on without horror.<br /><br />As to the much discussed violence: this is a depiction of the effects of violence and the horrors of a world driven mad by religious excess. To have shied away from the violence would have limited the film's impact, would have cheapened the film and allowed it to be assimilated within the Patriarchal discourse it is exposing. In addition it is a realistic portrait of medieval society.<br /><br />Beautifully filmed, brilliantly acted (notably by Florinda Bolkin and Maria Casares), containing a wonderful score by piovani and still challenging after all these years Flavia is a classic of European Cinema.","[Postfeminist, depiction, cruelty, sadismbr, br, Spoiler, alert, br, br, underrated, gem, film, tells, story, Flavia, Fifteenth, Century, girl, Noble, birth, walled, convent, defining, father, indeed, whole, Medieval, Christian, society, viewing, fallen, Islamic, warrior, human, rather, demonic, figurebr, br, Unable, accept, patriarchal, rule, convent, explicitly, stated, scene, Bishop, arrives, flanked, soldiers, monks, Flavia, begins, explicitly, question, society, finds, butting, whole, system, subjugation, repression, violence, inevitably, brings, tragic, end, around, herbr, br, Billed, piece, nunsploitation, far, truth, film, depiction, consequences, violence, effects, patriarchal, dominance, nature, rebellion, corruption, human, spiritbr, br, described, title, piece, postfeminist, end, Flavias, triumphs, must, always, corrupted, compromised, perverted, ...]",-0.216966,0.726737,0.516006,-0.315491,"[postfeminist, depict, cruelti, sadismbr, br, spoiler, alert, br, br, underr, gem, film, tell, stori, flavia, fifteenth, centuri, girl, nobl, birth, wall, convent, defin, father, inde, whole, mediev, christian, societi, view, fallen, islam, warrior, human, rather, demon, figurebr, br, unabl, accept, patriarch, rule, convent, explicitli, state, scene, bishop, arriv, flank, soldier, monk, flavia, begin, explicitli, question, societi, find, but, whole, system, subjug, repress, violenc, inevit, bring, tragic, end, around, herbr, br, bill, piec, nunsploit, far, truth, film, depict, consequ, violenc, effect, patriarch, domin, natur, rebellion, corrupt, human, spiritbr, br, describ, titl, piec, postfeminist, end, flavia, triumph, must, alway, corrupt, compromis, pervert, ...]"
13987,"OMG this is one of the worst films iv ever seen and iv seen a lot I'm a Film student. I don't understand why Angelina Jolie would be in this movie? Did she need the money that badly? I love AJ and have seen almost everything shes ever been in so i watched this 2 tick another one off. It was SOO bad! not even good bad, just bad bad. It had 1 or 2 funny little moments but all in all it was bad n a waste of 101 minutes. I cant even say AJ looked good in it because well she didn't. The plot is predictable unless you r expecting a re-telling of Romeo and Juliet then its not. All round disappointing. Maybe if your 12 this could be a good film otherwise I really don't recommend it.","[OMG, one, worst, films, iv, ever, seen, iv, seen, lot, Im, Film, student, dont, understand, Angelina, Jolie, would, movie, need, money, badly, love, AJ, seen, almost, everything, shes, ever, watched, 2, tick, another, one, SOO, bad, even, good, bad, bad, bad, 1, 2, funny, little, moments, bad, n, waste, 101, minutes, cant, even, say, AJ, looked, good, well, didnt, plot, predictable, unless, r, expecting, retelling, Romeo, Juliet, round, disappointing, Maybe, 12, could, good, film, otherwise, really, dont, recommend]",-0.481151,0.235657,-0.481820,-0.315491,"[omg, one, worst, film, iv, ever, seen, iv, seen, lot, im, film, student, dont, understand, angelina, joli, would, movi, need, money, badli, love, aj, seen, almost, everyth, she, ever, watch, 2, tick, anoth, one, soo, bad, even, good, bad, bad, bad, 1, 2, funni, littl, moment, bad, n, wast, 101, minut, cant, even, say, aj, look, good, well, didnt, plot, predict, unless, r, expect, retel, romeo, juliet, round, disappoint, mayb, 12, could, good, film, otherwis, realli, dont, recommend]"
7693,"The Box is a film with great potential, but the makers totally misused that potential. The film seemed to take for ever, because of the boring family dinners and scenes about school and job-dialogs between the action. Those scenes could and must be deleted in my opinion to keep up the tensity and thrill. The philosophy of human free will has potential and seems to referring to the philosophy of Thomas Hobbes (1588-1679), but we find ourselves regretfully struck with magic and nosebleeds, were even Harry Potter would flunked his class with!<br /><br />Probably the best part was that moment when Norma Lewis (Cameron Diaz)has been shot to death, by her loving and caring husband as an act of human free will. I wonder how Hobbes would react if he could...","[Box, film, great, potential, makers, totally, misused, potential, film, seemed, take, ever, boring, family, dinners, scenes, school, jobdialogs, action, scenes, could, must, deleted, opinion, keep, tensity, thrill, philosophy, human, free, potential, seems, referring, philosophy, Thomas, Hobbes, 15881679, find, regretfully, struck, magic, nosebleeds, even, Harry, Potter, would, flunked, class, withbr, br, Probably, best, part, moment, Norma, Lewis, Cameron, Diazhas, shot, death, loving, caring, husband, act, human, free, wonder, Hobbes, would, react, could]",-0.216966,-0.623733,-0.557742,-0.854129,"[box, film, great, potenti, maker, total, misus, potenti, film, seem, take, ever, bore, famili, dinner, scene, school, jobdialog, action, scene, could, must, delet, opinion, keep, tensiti, thrill, philosophi, human, free, potenti, seem, refer, philosophi, thoma, hobb, 15881679, find, regret, struck, magic, noseble, even, harri, potter, would, flunk, class, withbr, br, probabl, best, part, moment, norma, lewi, cameron, diazha, shot, death, love, care, husband, act, human, free, wonder, hobb, would, react, could]"
...,...,...,...,...,...,...,...
27517,"This is one creepy underrated Gem with chilling performances and a fantastic finale!. All the characters are great, and the story was awesome, plus i thought the ending was really cool!. The plot was great, and it never bored me, plus while the child actors were bad, they gave me the creeps!. This happened to be on the space channel a while ago, so i decided to check it out and tape it, i read some good reviews from fellow horror fans, i must say i agree with them, it's very creepy, and suspenseful, plus Strother Martin, was fantastic in his role, as the Satan worshiper. It has tons of creepy atmosphere, and it keeps you guessing throughout, plus all the characters were very likable, and you really start to root for Ben and his family!. It has plenty of disturbing moments, and the film really shocked me at times, plus, it's extremely well made on a low budget!. This is one creepy underrated gem, with chilling performances and a fantastic finale!, i highly recommend this one!. The Direction is very good!. Bernard McEveety does a very good job here, with great camera work, creating a lot of creepy atmosphere, and keeping the film at a very fast pace!. Ther is a little bit of blood and gore. We get a severed leg,lots of bloody corpses,bloody slit throat, slicing and dicing,decapitation, and an impaling. The Acting is excellent!. Strother Martin is fantastic here! as the Satan worshiper, he is extremely creepy, very convincing, was quite chilling, was extremely intense, seemed to be enjoying himself, and just did a fantastic job overall!. Charles Bateman is great as the Dad, he was very caring, very likable, and gave a good show!, i liked him lots. L.Q. Jones is awesome as the Sheriff, he was funny, on top of things, looked very young, had a cool character, and just did an awesome job overall!. Ahna Capri is good as the girlfriend and did what she had to do pretty well. Charles Robinson overacted to the extreme as the Priest and didn't convince me one bit!, and that laugh of his was especially bad. Geri Reischl is actually decent as the daughter, she was somewhat likable, and only got on my nerves a couple times, i rather liked her. Alvy Moore was goofy, but very likable in his role as Tobey i dug him!. Rest of the cast do good. Overall i highly recommend it!. ***1/2 out of 5","[one, creepy, underrated, Gem, chilling, performances, fantastic, finale, characters, great, story, awesome, plus, thought, ending, really, cool, plot, great, never, bored, plus, child, actors, bad, gave, creeps, happened, space, channel, ago, decided, check, tape, read, good, reviews, fellow, horror, fans, must, say, agree, creepy, suspenseful, plus, Strother, Martin, fantastic, role, Satan, worshiper, tons, creepy, atmosphere, keeps, guessing, throughout, plus, characters, likable, really, start, root, Ben, family, plenty, disturbing, moments, film, really, shocked, times, plus, extremely, well, made, low, budget, one, creepy, underrated, gem, chilling, performances, fantastic, finale, highly, recommend, one, Direction, good, Bernard, McEveety, good, job, great, camera, work, creating, ...]",4.142082,1.095047,0.971536,-0.854129,"[one, creepi, underr, gem, chill, perform, fantast, final, charact, great, stori, awesom, plu, thought, end, realli, cool, plot, great, never, bore, plu, child, actor, bad, gave, creep, happen, space, channel, ago, decid, check, tape, read, good, review, fellow, horror, fan, must, say, agre, creepi, suspens, plu, strother, martin, fantast, role, satan, worship, ton, creepi, atmospher, keep, guess, throughout, plu, charact, likabl, realli, start, root, ben, famili, plenti, disturb, moment, film, realli, shock, time, plu, extrem, well, made, low, budget, one, creepi, underr, gem, chill, perform, fantast, final, highli, recommend, one, direct, good, bernard, mceveeti, good, job, great, camera, work, creat, ...]"
28392,"The final chapter in the Hanzo the Razor trilogy provides fitting closure for this entertaining series of samuraisploitation. Inoue replaces Yasuzu Masumura (Blind Beast, Red Angel, Manji) in the director's chair, but the style is pretty much the same, perhaps due to Shintaro Katsu serving as the producer, apart from the titular antihero.<br /><br />Hanzo uncovers a female ghost who is guarding treasure hidden in the bottom of the lake. Of course, Hanzo being Hanzo, he's not put off by the fact she's a ghost, so he proceeds to rape... ahem, interrogate her, using the now familiar revolving net device. The plot takes through a series of blind monks who also doubletime as loansharks, corrupt officials, promiscuous wives and the necessary hack and slash. Hanzo's superior officer, Onishi, and his two servants, provide the typical comedic notes, and generally, it's business as usual.<br /><br />Significantly less convoluted and easier to follow than the first (which is all over the place and a bit of a mess), less stylish, dramatic and bloody than the second (arguably the finest in the Hanzo series), but still entertaining and worthwhile on its own merits. Complete with trademark training sequences, the obligatory rape, swordfights, and a mystery Hanzo is called upon to investigate, this will ultimately satisfy the fans.","[final, chapter, Hanzo, Razor, trilogy, provides, fitting, closure, entertaining, series, samuraisploitation, Inoue, replaces, Yasuzu, Masumura, Blind, Beast, Red, Angel, Manji, directors, chair, style, pretty, much, perhaps, due, Shintaro, Katsu, serving, producer, apart, titular, antiherobr, br, Hanzo, uncovers, female, ghost, guarding, treasure, hidden, bottom, lake, course, Hanzo, Hanzo, hes, put, fact, shes, ghost, proceeds, rape, ahem, interrogate, using, familiar, revolving, net, device, plot, takes, series, blind, monks, also, doubletime, loansharks, corrupt, officials, promiscuous, wives, necessary, hack, slash, Hanzos, superior, officer, Onishi, two, servants, provide, typical, comedic, notes, generally, business, usualbr, br, Significantly, less, convoluted, easier, follow, first, place, bit, mess, less, ...]",0.311404,0.235657,0.049631,-0.315491,"[final, chapter, hanzo, razor, trilog, provid, fit, closur, entertain, seri, samuraisploit, inou, replac, yasuzu, masumura, blind, beast, red, angel, manji, director, chair, style, pretti, much, perhap, due, shintaro, katsu, serv, produc, apart, titular, antiherobr, br, hanzo, uncov, femal, ghost, guard, treasur, hidden, bottom, lake, cours, hanzo, hanzo, he, put, fact, she, ghost, proce, rape, ahem, interrog, use, familiar, revolv, net, devic, plot, take, seri, blind, monk, also, doubletim, loanshark, corrupt, offici, promiscu, wive, necessari, hack, slash, hanzo, superior, offic, onishi, two, servant, provid, typic, comed, note, gener, busi, usualbr, br, significantli, less, convolut, easier, follow, first, place, bit, mess, less, ...]"
5776,"I just saw this movie and all I can say is, where are the drive in's these days. This seems like it would have been a great 2nd feature at a drive in in 1977 (maybe playing with one of those Joan Collins movies), but it's only worth watching now if you're feeling nostalgic for the 70's. Silly plot that is full of holes, but it does remind one of the era it was made in. Interesting to see Melanie Griffith so young and Anne Lockhart is quite attractive, though not much of an actress. In fact, there is not much acting going on in this movie at all. It's sort of a Dukes of Hazzard adventure without a twang or a 1969 Dodge charger jumping over stuff in the Woods. But there is a Mecrury Comet jumping over a garbage dump in this one!","[saw, movie, say, drive, ins, days, seems, like, would, great, 2nd, feature, drive, 1977, maybe, playing, one, Joan, Collins, movies, worth, watching, youre, feeling, nostalgic, 70s, Silly, plot, full, holes, remind, one, era, made, Interesting, see, Melanie, Griffith, young, Anne, Lockhart, quite, attractive, though, much, actress, fact, much, acting, going, movie, sort, Dukes, Hazzard, adventure, without, twang, 1969, Dodge, charger, jumping, stuff, Woods, Mecrury, Comet, jumping, garbage, dump, one]",-0.613243,-0.623733,-0.579433,0.223148,"[saw, movi, say, drive, in, day, seem, like, would, great, 2nd, featur, drive, 1977, mayb, play, one, joan, collin, movi, worth, watch, your, feel, nostalg, 70, silli, plot, full, hole, remind, one, era, made, interest, see, melani, griffith, young, ann, lockhart, quit, attract, though, much, actress, fact, much, act, go, movi, sort, duke, hazzard, adventur, without, twang, 1969, dodg, charger, jump, stuff, wood, mecruri, comet, jump, garbag, dump, one]"
24864,"Cameron Diaz is a woman who is married to a judge, played by Harvey Keitel, whose life is fine until an ex shows up and things get a little complicated.. While I was watching this movie there were several times i asked myself why I was doing so..because the movie is so ridiculous and blah and poorly scripted without any believability. Nor does the audience really car what happens..Even the lovely Cameron can't save this one on a scale of one to ten..2","[Cameron, Diaz, woman, married, judge, played, Harvey, Keitel, whose, life, fine, ex, shows, things, get, little, complicated, watching, movie, several, times, asked, sobecause, movie, ridiculous, blah, poorly, scripted, without, believability, audience, really, car, happensEven, lovely, Cameron, cant, save, one, scale, one, ten2]",-1.009520,-0.623733,-0.872274,-0.854129,"[cameron, diaz, woman, marri, judg, play, harvey, keitel, whose, life, fine, ex, show, thing, get, littl, complic, watch, movi, sever, time, ask, sobecaus, movi, ridicul, blah, poorli, script, without, believ, audienc, realli, car, happenseven, love, cameron, cant, save, one, scale, one, ten2]"


In [23]:
rows_with_not = X_train[X_train['review'].str.contains(r'\bnot\b', case=False, na=False)]
pd.set_option('display.max_colwidth', None)  # Allows columns to display their full content
pd.set_option('display.width', 200) 
rows_with_not

Unnamed: 0,review,cleaned_review,NrPos,NrNeg,NrWords,Nos,Stem Words
11794,"With no fault to the actors (they all put on great performances), the overall story was not very well executed. The movie opens with a great zinger: a crazy old guy forces a young Aborigine girl's car off the road. But then, we're forced to endure 40 minutes of character development with an entirely new group of characters ... and we don't know why until the 40 minutes are up. It turns out that they are the ones who eventually discover the girl's body ... and the story progresses from there.<br /><br />While the story does pick up at that point, it really goes nowhere. After 2 hours, I asked myself: was there a point to this, or was it just to see the characters struggle with accusations of racism and stupidity of how they handled the discovery? The story was ultimately unsatisfying and felt unfinished. While it is well acted, there's not a strong enough backbone in the film to warrant recommending it.","[fault, actors, put, great, performances, overall, story, well, executed, movie, opens, great, zinger, crazy, old, guy, forces, young, Aborigine, girls, car, road, forced, endure, 40, minutes, character, development, entirely, new, group, characters, dont, know, 40, minutes, turns, ones, eventually, discover, girls, body, story, progresses, therebr, br, story, pick, point, really, goes, nowhere, 2, hours, asked, point, see, characters, struggle, accusations, racism, stupidity, handled, discovery, story, ultimately, unsatisfying, felt, unfinished, well, acted, theres, strong, enough, backbone, film, warrant, recommending]",-0.349058,-0.255423,-0.481820,0.0,"[fault, actor, put, great, perform, overal, stori, well, execut, movi, open, great, zinger, crazi, old, guy, forc, young, aborigin, girl, car, road, forc, endur, 40, minut, charact, develop, entir, new, group, charact, dont, know, 40, minut, turn, one, eventu, discov, girl, bodi, stori, progress, therebr, br, stori, pick, point, realli, goe, nowher, 2, hour, ask, point, see, charact, struggl, accus, racism, stupid, handl, discoveri, stori, ultim, unsatisfi, felt, unfinish, well, act, there, strong, enough, backbon, film, warrant, recommend]"
24925,"The first thing I thought when I saw this films was: It is not really a film, at least it is not what we imagine spontaneously when we hear the word ""film"". it is entirely symbolic, everything in it has a figurative meaning. So if you are not used to express thing in a symbolic way, you will find it strange, if you are not acquainted with philosophy, religion, spiritual life, you will think it's just a fairy-tale... and even a weird one, chaotic. For me ""The legend of Zu"" is perfectly transparent. And I do like it. It tells us in images the story about the fight between light and darkness, the fight that is as old as humanity, and every one who is in search of the sens in this life is confronted with it. The film is obviously made by Buddhists. I am not a Buddhist. My religion and the vision of the world and human is different. But as far as we are all humans and have the same human nature we necessarily have common experiences and can understand each other. It is a really beautiful film! And I which we had more films like this - films that have a meaning. There are too many empty stories which are good only to make time pass more quickly.","[first, thing, thought, saw, films, really, film, least, imagine, spontaneously, hear, word, film, entirely, symbolic, everything, figurative, meaning, used, express, thing, symbolic, way, find, strange, acquainted, philosophy, religion, spiritual, life, think, fairytale, even, weird, one, chaotic, legend, Zu, perfectly, transparent, like, tells, us, images, story, fight, light, darkness, fight, old, humanity, every, one, search, sens, life, confronted, film, obviously, made, Buddhists, Buddhist, religion, vision, world, human, different, far, humans, human, nature, necessarily, common, experiences, understand, really, beautiful, film, films, like, films, meaning, many, empty, stories, good, make, time, pass, quickly]",-0.349058,-0.623733,-0.351669,0.0,"[first, thing, thought, saw, film, realli, film, least, imagin, spontan, hear, word, film, entir, symbol, everyth, figur, mean, use, express, thing, symbol, way, find, strang, acquaint, philosophi, religion, spiritu, life, think, fairytal, even, weird, one, chaotic, legend, zu, perfectli, transpar, like, tell, us, imag, stori, fight, light, dark, fight, old, human, everi, one, search, sen, life, confront, film, obvious, made, buddhist, buddhist, religion, vision, world, human, differ, far, human, human, natur, necessarili, common, experi, understand, realli, beauti, film, film, like, film, mean, mani, empti, stori, good, make, time, pass, quickli]"
28578,"Post-feminist depiction of cruelty and sadism.<br /><br />Spoiler alert! <br /><br />This underrated gem of a film tells the story of Flavia, a Fifteenth Century girl of Noble birth walled up in a convent after defining her father and indeed the whole of Medieval Christian society by viewing a fallen Islamic warrior as a human rather than demonic figure.<br /><br />Unable to accept the patriarchal rule of the convent (explicitly stated in a scene where the Bishop arrives flanked by soldiers and monks) Flavia begins to explicitly question the society in which she finds herself and, through butting up against a whole system of subjugation, repression and violence, inevitably brings a tragic end not only to herself but all those around her.<br /><br />Billed as a piece of nunsploitation this is far from the truth. This is a film depiction the consequences of violence, the effects of patriarchal dominance, the nature of rebellion and the corruption of the human spirit.<br /><br />I described it in the title of this piece as 'post-feminist' and in the end Flavia's triumphs must always be corrupted, compromised and perverted by men. Even Flavia's gruesome end is perpetrated by men for men (the women turn away and only the monks look on without horror.<br /><br />As to the much discussed violence: this is a depiction of the effects of violence and the horrors of a world driven mad by religious excess. To have shied away from the violence would have limited the film's impact, would have cheapened the film and allowed it to be assimilated within the Patriarchal discourse it is exposing. In addition it is a realistic portrait of medieval society.<br /><br />Beautifully filmed, brilliantly acted (notably by Florinda Bolkin and Maria Casares), containing a wonderful score by piovani and still challenging after all these years Flavia is a classic of European Cinema.","[Postfeminist, depiction, cruelty, sadismbr, br, Spoiler, alert, br, br, underrated, gem, film, tells, story, Flavia, Fifteenth, Century, girl, Noble, birth, walled, convent, defining, father, indeed, whole, Medieval, Christian, society, viewing, fallen, Islamic, warrior, human, rather, demonic, figurebr, br, Unable, accept, patriarchal, rule, convent, explicitly, stated, scene, Bishop, arrives, flanked, soldiers, monks, Flavia, begins, explicitly, question, society, finds, butting, whole, system, subjugation, repression, violence, inevitably, brings, tragic, end, around, herbr, br, Billed, piece, nunsploitation, far, truth, film, depiction, consequences, violence, effects, patriarchal, dominance, nature, rebellion, corruption, human, spiritbr, br, described, title, piece, postfeminist, end, Flavias, triumphs, must, always, corrupted, compromised, perverted, ...]",-0.216966,0.726737,0.516006,0.0,"[postfeminist, depict, cruelti, sadismbr, br, spoiler, alert, br, br, underr, gem, film, tell, stori, flavia, fifteenth, centuri, girl, nobl, birth, wall, convent, defin, father, inde, whole, mediev, christian, societi, view, fallen, islam, warrior, human, rather, demon, figurebr, br, unabl, accept, patriarch, rule, convent, explicitli, state, scene, bishop, arriv, flank, soldier, monk, flavia, begin, explicitli, question, societi, find, but, whole, system, subjug, repress, violenc, inevit, bring, tragic, end, around, herbr, br, bill, piec, nunsploit, far, truth, film, depict, consequ, violenc, effect, patriarch, domin, natur, rebellion, corrupt, human, spiritbr, br, describ, titl, piec, postfeminist, end, flavia, triumph, must, alway, corrupt, compromis, pervert, ...]"
13987,"OMG this is one of the worst films iv ever seen and iv seen a lot I'm a Film student. I don't understand why Angelina Jolie would be in this movie? Did she need the money that badly? I love AJ and have seen almost everything shes ever been in so i watched this 2 tick another one off. It was SOO bad! not even good bad, just bad bad. It had 1 or 2 funny little moments but all in all it was bad n a waste of 101 minutes. I cant even say AJ looked good in it because well she didn't. The plot is predictable unless you r expecting a re-telling of Romeo and Juliet then its not. All round disappointing. Maybe if your 12 this could be a good film otherwise I really don't recommend it.","[OMG, one, worst, films, iv, ever, seen, iv, seen, lot, Im, Film, student, dont, understand, Angelina, Jolie, would, movie, need, money, badly, love, AJ, seen, almost, everything, shes, ever, watched, 2, tick, another, one, SOO, bad, even, good, bad, bad, bad, 1, 2, funny, little, moments, bad, n, waste, 101, minutes, cant, even, say, AJ, looked, good, well, didnt, plot, predictable, unless, r, expecting, retelling, Romeo, Juliet, round, disappointing, Maybe, 12, could, good, film, otherwise, really, dont, recommend]",-0.481151,0.235657,-0.481820,0.0,"[omg, one, worst, film, iv, ever, seen, iv, seen, lot, im, film, student, dont, understand, angelina, joli, would, movi, need, money, badli, love, aj, seen, almost, everyth, she, ever, watch, 2, tick, anoth, one, soo, bad, even, good, bad, bad, bad, 1, 2, funni, littl, moment, bad, n, wast, 101, minut, cant, even, say, aj, look, good, well, didnt, plot, predict, unless, r, expect, retel, romeo, juliet, round, disappoint, mayb, 12, could, good, film, otherwis, realli, dont, recommend]"
8561,"I have been most fortunate this year to have seen several films at my university's art museum. On occasion, well, more like half of the time, I am unable to watch the films there. I have systematically attempted to view each of the films that I have missed. So far Plagues and Pleasures on the Salton Sea and Who Killed the Electric Car? are the other films that I have had to watch this way. The film covers an intriguing subject matter and is well-theorized (emphasis on this later) but not as successful as Plagues and Pleasures, but far superior to Electric Car. <br /><br />The film's thesis concern's the future of the American concept of suburban living. It questions the feasibility of such a practice as oil prices rise. So, the film discusses the origin of the suburb, and it's evolution until the early 2000s. One theme the film discusses at length is the alienation the suburb creates among its inhabitants. While several people may live together, they do not ""know"" each other as we define the word. This, to me, represents the strength of the film: its appeal to actual human emotion. We are able to understand the filmmakers' argument so much easier because they do not have to convince us of their argument's legitimacy. This is also one of the reasons Salton Sea is such a wonderful documentary. <br /><br />Unfortunately, Suburbia loses its message in firebrand explanation in support of its central argument. As those interviewed speak, their arguments become progressively more akin to those made by militant environmentalists. We are told that oil production will hit its peak in this decade, but are given no scientific evidence (professional reports, statistics, graphs, etc) in support of this claim. We are given little information as to how this date was calculated. Fortunately, this was the only significant flaw that I was able to detect in the film's argument yet it's a glaring one nevertheless. Another less-important discrepancy I noticed was the liberal (political) bias which could polarize some viewers. However, this bias is revealed thorough clips of various events and not the filmmakers themselves. The clips, especially those from the 1950's, seemed a tad unnecessary to me. The film was no better with their presence, and would have been more concise in their absence. <br /><br />As I thought more of this film before composing this review, I thought about why I found its argument more convincing than other documentaries that I'd recently viewed. Finally, I realized that the filmmakers actually offered analysis to the suburban problem. They propose a decentralized village-system where pockets of people would live together. They posit this practice would lower the necessity for fossil fuels and reduce wasted space. They define wasted space as the long stretches of parking lots between shopping areas, for instance. What is incredible about this supposition is that it's actually conceivable. Most documentaries vaguely state that some problem should be ended but offer no method of doing so. Thinking more about the film, I decided that this analysis is what saved the film for me and why I give it a favorable review. <br /><br />While neither perfectly convincing nor fluid in presentation, The End of Suburbia is a worthwhile investment of one's time. It not only addresses the contemporary problem of sprawl, but it also provides realistic insight on how to amend it. The audience can also enjoy the high production value with various clips from the 1950's spliced with the modern arguers. People living in Atlanta, Georgia or the Triad region of North Carolina will particularly enjoy this documentary as sprawl is the most established there.","[fortunate, year, seen, several, films, universitys, art, museum, occasion, well, like, half, time, unable, watch, films, systematically, attempted, view, films, missed, far, Plagues, Pleasures, Salton, Sea, Killed, Electric, Car, films, watch, way, film, covers, intriguing, subject, matter, welltheorized, emphasis, later, successful, Plagues, Pleasures, far, superior, Electric, Car, br, br, films, thesis, concerns, future, American, concept, suburban, living, questions, feasibility, practice, oil, prices, rise, film, discusses, origin, suburb, evolution, early, 2000s, One, theme, film, discusses, length, alienation, suburb, creates, among, inhabitants, several, people, may, live, together, know, define, word, represents, strength, film, appeal, actual, human, emotion, able, understand, filmmakers, argument, much, ...]",2.028604,0.972277,2.045284,0.0,"[fortun, year, seen, sever, film, univers, art, museum, occas, well, like, half, time, unabl, watch, film, systemat, attempt, view, film, miss, far, plagu, pleasur, salton, sea, kill, electr, car, film, watch, way, film, cover, intrigu, subject, matter, welltheor, emphasi, later, success, plagu, pleasur, far, superior, electr, car, br, br, film, thesi, concern, futur, american, concept, suburban, live, question, feasibl, practic, oil, price, rise, film, discuss, origin, suburb, evolut, earli, 2000, one, theme, film, discuss, length, alien, suburb, creat, among, inhabit, sever, peopl, may, live, togeth, know, defin, word, repres, strength, film, appeal, actual, human, emot, abl, understand, filmmak, argument, much, ...]"
...,...,...,...,...,...,...,...
19598,"People don't seem to agree with me that movies can be bad and good at the same time. The same type of people that see a movie with Carrot Top on the cover, surfing through a frickin office and continue to watch the movie with serious expectations. Is Carrot Top funny? Of course not. Was this movie anything special? Of course not. It was a dumb movie and everyone assumed so simply based on what they know about Mr. Top. Movies like this, or Kazaam, or Killer Klowns From Outer Space and pretty much any movie Pauly Shore has ever been in are not meant to be taken seriously and because of this, they really shouldn't be considered some of the worst movies ever made. You watch them either expecting a dumb movie that's hilariously bad or you are like six years old and genuinely think Carrot Top is hilarious.<br /><br />Please people if you ready know Carrot Top is retarded and you want to watch a serious movie and bother writing a serious review....don't watch this. Picking on this movie is like picking on a 5 year old for not knowing the alphabet.","[People, dont, seem, agree, movies, bad, good, time, type, people, see, movie, Carrot, Top, cover, surfing, frickin, office, continue, watch, movie, serious, expectations, Carrot, Top, funny, course, movie, anything, special, course, dumb, movie, everyone, assumed, simply, based, know, Mr, Top, Movies, like, Kazaam, Killer, Klowns, Outer, Space, pretty, much, movie, Pauly, Shore, ever, meant, taken, seriously, really, shouldnt, considered, worst, movies, ever, made, watch, either, expecting, dumb, movie, thats, hilariously, bad, like, six, years, old, genuinely, think, Carrot, Top, hilariousbr, br, Please, people, ready, know, Carrot, Top, retarded, want, watch, serious, movie, bother, writing, serious, reviewdont, watch, Picking, movie, like, ...]",0.179311,-0.009883,-0.178134,0.0,"[peopl, dont, seem, agre, movi, bad, good, time, type, peopl, see, movi, carrot, top, cover, surf, frickin, offic, continu, watch, movi, seriou, expect, carrot, top, funni, cours, movi, anyth, special, cours, dumb, movi, everyon, assum, simpli, base, know, mr, top, movi, like, kazaam, killer, klown, outer, space, pretti, much, movi, pauli, shore, ever, meant, taken, serious, realli, shouldnt, consid, worst, movi, ever, made, watch, either, expect, dumb, movi, that, hilari, bad, like, six, year, old, genuin, think, carrot, top, hilariousbr, br, pleas, peopl, readi, know, carrot, top, retard, want, watch, seriou, movi, bother, write, seriou, reviewdont, watch, pick, movi, like, ...]"
1013,"This movie is a real low budget production, yet I will not say anything more on that as it already has been covered. I give this movie a low rating for the story alone, but I met the director the night I saw the film and he gave me an additional reason to dislike the movie. He asked me how I enjoyed it and I told him that it was not easy to like. My main objection was the lack of foundation for the relationship between the two main characters, I was never convinced that they were close. I also told him that the scene where the main characters were presented as children becoming friends was too late in the film.<br /><br />He told me that the flashback scenes were not in the original script. That they were added because he felt like I did that the two main characters did not appear close. He went on to explain that these scenes were not filmed to his satisfaction as they were out of money. I agree that they did not do much for the film.<br /><br />Another fact about the movie, that I was not aware of, is the actor who had the lead wrote the script based on his own personal experience. This is usually a bad move as some writers do not take into consideration the emotional reaction the viewer. The story is so close to home that the writer make too many assumption as to the audience's reaction to his own tragedy. And the story is tragic. However, it did not work for me as I never cared for any of the characters, least of all the lead. What was presented were two evil people out to make a buck by any means, regardless who gets hurt. When Ms. Young's character decides to give up he evil ways, it appears that she does so because she is ineffective, not because she knows she is doing wrong. If the movie has a message then I suspect that only the writer is aware of it.","[movie, real, low, budget, production, yet, say, anything, already, covered, give, movie, low, rating, story, alone, met, director, night, saw, film, gave, additional, reason, dislike, movie, asked, enjoyed, told, easy, like, main, objection, lack, foundation, relationship, two, main, characters, never, convinced, close, also, told, scene, main, characters, presented, children, becoming, friends, late, filmbr, br, told, flashback, scenes, original, script, added, felt, like, two, main, characters, appear, close, went, explain, scenes, filmed, satisfaction, money, agree, much, filmbr, br, Another, fact, movie, aware, actor, lead, wrote, script, based, personal, experience, usually, bad, move, writers, take, consideration, emotional, reaction, viewer, story, close, home, ...]",-0.349058,0.358427,0.212320,0.0,"[movi, real, low, budget, product, yet, say, anyth, alreadi, cover, give, movi, low, rate, stori, alon, met, director, night, saw, film, gave, addit, reason, dislik, movi, ask, enjoy, told, easi, like, main, object, lack, foundat, relationship, two, main, charact, never, convinc, close, also, told, scene, main, charact, present, children, becom, friend, late, filmbr, br, told, flashback, scene, origin, script, ad, felt, like, two, main, charact, appear, close, went, explain, scene, film, satisfact, money, agre, much, filmbr, br, anoth, fact, movi, awar, actor, lead, wrote, script, base, person, experi, usual, bad, move, writer, take, consider, emot, reaction, viewer, stori, close, home, ...]"
35830,"That's the question you have to ask yourself when you watch this movie ""What was the point?"" This movie was nothing but an hour and a half of confusion with completely unlikable people (not going to use the word actors) and a script that you could tell didn't exist.<br /><br />One of the things that made me laugh the most about this movie was how it said ""Victorian story written by"" which means that there was actually a script to that part of the story. The entire victorian section had no dialogue, and was just comprised of shots of a guy staring at a girl and vice versa. Making that part of the movie as scripted as a camera left on at a train station.<br /><br />OK, time for the story. It starts out with a guy sitting in a chair never once getting out of it. Oh blocking, who needs you? These newspeople come to his house and practically beg him to tell this story about these dead girls. So he starts off the story in Victorian times. and here's how the scene goes (Guy and girl are in a field. pretty music starts to play) (guy stares at girl) (girl stares at guy) (guy stares at girl) cut back to movie. That's pretty much all that happens for about half the movie.<br /><br />The rest of the film is incredibly awkward dialogue about a bunch of models wanting to buy an apartment. So this real estate agent shows them one and when i say the dialogue is awkward i mean, if it were a dancer it would trip during the MACARENA. None of the characters in this movie are likable. The models are incredibly irritating, the victorian people don't talk, and the guy telling the story has the personality of a sack of onions. So eventually all the girls get killed off. and by killed off, i mean drug offscreen. ooh. you showed ONE death? and by death i mean holding her face till they put the blood makeup on? awesome.<br /><br />HOW this guy even knows this story baffles me. He says it's because he saw it. but how? there was no guy in that apartment! the door was locked shut with no way out, the windows were attached to a fire escape that was too rotten to work, how the HELL did he see all that? Oh plot holes. we DO love you. So the movie finishes up with the newswoman saying ""i think you made it up. you're wasting our time"" despite the fact that she begged him for the interview in the first place. Whatever. This movie was stupid, pointless, and made no sense with a lot of plot holes. I could go on and on about this movie, but i don't see the need. i'd much rather spend my time doing something uselful. Like widdle something. ""Hell's Threshold"" more belongs in purgatory with 2 dumb models. out of 10.","[Thats, question, ask, watch, movie, point, movie, nothing, hour, half, confusion, completely, unlikable, people, going, use, word, actors, script, could, tell, didnt, existbr, br, One, things, made, laugh, movie, said, Victorian, story, written, means, actually, script, part, story, entire, victorian, section, dialogue, comprised, shots, guy, staring, girl, vice, versa, Making, part, movie, scripted, camera, left, train, stationbr, br, OK, time, story, starts, guy, sitting, chair, never, getting, Oh, blocking, needs, newspeople, come, house, practically, beg, tell, story, dead, girls, starts, story, Victorian, times, heres, scene, goes, Guy, girl, field, pretty, music, starts, play, guy, stares, girl, girl, stares, guy, guy, ...]",-0.216966,1.463357,1.351144,0.0,"[that, question, ask, watch, movi, point, movi, noth, hour, half, confus, complet, unlik, peopl, go, use, word, actor, script, could, tell, didnt, existbr, br, one, thing, made, laugh, movi, said, victorian, stori, written, mean, actual, script, part, stori, entir, victorian, section, dialogu, compris, shot, guy, stare, girl, vice, versa, make, part, movi, script, camera, left, train, stationbr, br, ok, time, stori, start, guy, sit, chair, never, get, oh, block, need, newspeopl, come, hous, practic, beg, tell, stori, dead, girl, start, stori, victorian, time, here, scene, goe, guy, girl, field, pretti, music, start, play, guy, stare, girl, girl, stare, guy, guy, ...]"
28392,"The final chapter in the Hanzo the Razor trilogy provides fitting closure for this entertaining series of samuraisploitation. Inoue replaces Yasuzu Masumura (Blind Beast, Red Angel, Manji) in the director's chair, but the style is pretty much the same, perhaps due to Shintaro Katsu serving as the producer, apart from the titular antihero.<br /><br />Hanzo uncovers a female ghost who is guarding treasure hidden in the bottom of the lake. Of course, Hanzo being Hanzo, he's not put off by the fact she's a ghost, so he proceeds to rape... ahem, interrogate her, using the now familiar revolving net device. The plot takes through a series of blind monks who also doubletime as loansharks, corrupt officials, promiscuous wives and the necessary hack and slash. Hanzo's superior officer, Onishi, and his two servants, provide the typical comedic notes, and generally, it's business as usual.<br /><br />Significantly less convoluted and easier to follow than the first (which is all over the place and a bit of a mess), less stylish, dramatic and bloody than the second (arguably the finest in the Hanzo series), but still entertaining and worthwhile on its own merits. Complete with trademark training sequences, the obligatory rape, swordfights, and a mystery Hanzo is called upon to investigate, this will ultimately satisfy the fans.","[final, chapter, Hanzo, Razor, trilogy, provides, fitting, closure, entertaining, series, samuraisploitation, Inoue, replaces, Yasuzu, Masumura, Blind, Beast, Red, Angel, Manji, directors, chair, style, pretty, much, perhaps, due, Shintaro, Katsu, serving, producer, apart, titular, antiherobr, br, Hanzo, uncovers, female, ghost, guarding, treasure, hidden, bottom, lake, course, Hanzo, Hanzo, hes, put, fact, shes, ghost, proceeds, rape, ahem, interrogate, using, familiar, revolving, net, device, plot, takes, series, blind, monks, also, doubletime, loansharks, corrupt, officials, promiscuous, wives, necessary, hack, slash, Hanzos, superior, officer, Onishi, two, servants, provide, typical, comedic, notes, generally, business, usualbr, br, Significantly, less, convoluted, easier, follow, first, place, bit, mess, less, ...]",0.311404,0.235657,0.049631,0.0,"[final, chapter, hanzo, razor, trilog, provid, fit, closur, entertain, seri, samuraisploit, inou, replac, yasuzu, masumura, blind, beast, red, angel, manji, director, chair, style, pretti, much, perhap, due, shintaro, katsu, serv, produc, apart, titular, antiherobr, br, hanzo, uncov, femal, ghost, guard, treasur, hidden, bottom, lake, cours, hanzo, hanzo, he, put, fact, she, ghost, proce, rape, ahem, interrog, use, familiar, revolv, net, devic, plot, take, seri, blind, monk, also, doubletim, loanshark, corrupt, offici, promiscu, wive, necessari, hack, slash, hanzo, superior, offic, onishi, two, servant, provid, typic, comed, note, gener, busi, usualbr, br, significantli, less, convolut, easier, follow, first, place, bit, mess, less, ...]"


In [27]:
nos_stats = X_train['Nos'].agg(['mean', 'max', 'min'])

# Display the results
print(nos_stats)

mean   -9.769963e-18
max     1.153455e+01
min    -8.541290e-01
Name: Nos, dtype: float64


Making Embeddings

When making the embeddings, we choose values of 4 for window and 1 for sg. 1 was chosen for sg to use SkipGram. The data has 40000 reviews, and even though this is sufficient to train a Word2Vec model, rare or domain-specific sentiment words may not be well captured by a CBOW. The window chosen was 4 because the sentiment is often localized in very few words.

In [28]:
reviews = list(X_train['Stem Words'])
model = Word2Vec(
    sentences=reviews,  # Tokenized data
    vector_size=100,             # Embedding dimensionality
    window=4,                    # Context window size
    min_count=2,                 # Minimum frequency for words to be included
    sg=1,                        # Use CBOW (set to 1 for Skip-Gram)
    workers=4,                   # Number of threads for faster training
    epochs=10                    # Number of passes over the data
)

# Save the model for future use
model.save("word2vec_cbow.model")


In [29]:
model.wv['film']

array([-0.07770786,  0.15269166, -0.00447225,  0.034062  ,  0.325359  ,
       -0.12248843,  0.02910477,  0.4072858 , -0.14709984, -0.12952596,
       -0.25982457, -0.3839486 , -0.05629438,  0.42851132,  0.35073417,
        0.10201956, -0.10908537,  0.33942097, -0.20338179, -0.19975029,
        0.29158154, -0.02966006,  0.32787427, -0.00136138,  0.32530898,
       -0.12784657, -0.11470099,  0.22793236, -0.01679601,  0.07158659,
        0.25591916, -0.17610694,  0.296032  , -0.42777047, -0.02839102,
        0.30722722,  0.10857434, -0.04826575, -0.36882648, -0.07117313,
        0.1052228 , -0.256852  ,  0.09142398,  0.2636197 ,  0.14035228,
       -0.00501746, -0.04419826, -0.37684727,  0.0161701 ,  0.07387528,
        0.21617617, -0.18350986, -0.3036567 , -0.2822288 , -0.12519188,
        0.14377035,  0.20223454, -0.21154587, -0.15528145,  0.27358457,
        0.06026944,  0.02316202, -0.09303118,  0.03925332, -0.2652876 ,
        0.14225906,  0.188505  ,  0.19473477, -0.11783864, -0.02

In [30]:
model.wv.most_similar('film')

[('movi', 0.9021115303039551),
 ('towelhead', 0.7714914083480835),
 ('shortfilm', 0.7663951516151428),
 ('nonmainstream', 0.7499780058860779),
 ('cassavettess', 0.7464396953582764),
 ('witnessedbr', 0.7415550351142883),
 ('directtodvd', 0.7393913865089417),
 ('criticsbr', 0.7382307648658752),
 ('fffc', 0.7371219396591187),
 ('tastesbr', 0.7359960079193115)]

'film' and 'movi' (movie) have similar vectors indicating this embedding seems to have worked

In [31]:
def transform_review_to_vector(review, model): #Create 1 vector of the text based on the Word2Vec vectors of the words in the sentence
    word_vectors = []
    for word in review:
        if word in model.wv:
            word_vectors.append(model.wv[word])
    if len(word_vectors) == 0:  # Handle case where no words are in vocabulary
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

Developping models: Embeddings, Features, Embeddings + Features. The architecture chosen starts at a high number of neurons, and then goes down gradually over the layers, so that it extracts more and more high level features. The data is balanced (same ratio for positives and negatives), so we use accuracy for model selection. The number of neurons for the model with only features was reduced (8 -> 4 -> 2)

In [33]:
X_train.shape

(32000, 7)

In [34]:
def prepare_data(choice, data, cols, model):
    if choice["Embedding"] == 1:
      data["Embedded Data"] = data["Stem Words"].apply(lambda x : transform_review_to_vector(x, model))
      df_embeddings = pd.DataFrame(data['Embedded Data'].tolist(), index=data.index)
    else:
      df_embeddings = pd.DataFrame()

    if choice["Features"] == 1:
      df_features = data[cols]
    else:
      df_features = pd.DataFrame()

    final_df = pd.concat([df_embeddings, df_features], axis=1)
    return final_df.to_numpy()
#Feedforward Neural Network (FNN)
choice_params = [{"Embedding" : 1, "Features" : 0}, {"Embedding" : 0, "Features" : 1}, {"Embedding" : 1, "Features" : 1}]
network_sizes = {(0, 1): (8, 4, 2)}
results = {}
for option in choice_params:
  X_train_opt = prepare_data(option, X_train, cols, model)
  X_val_opt = prepare_data(option, X_val, cols, model)
  X_test_opt = prepare_data(option, X_test, cols, model)


  # Input size depends on your feature representation (e.g., TF-IDF size or embedding size)
  input_dim = X_train_opt.shape[1]
  tuple_option = (option["Embedding"], option["Features"])
  if tuple_option in network_sizes.keys():
      # Define the FNN
    fnn = Sequential([
      Dense(network_sizes[tuple_option][0], activation='relu', input_shape=(input_dim,)),  # Input layer
      Dropout(0.5),  # Dropout for regularization
      Dense(network_sizes[tuple_option][1], activation='relu'),  # Hidden layer
      Dropout(0.4),
      Dense(network_sizes[tuple_option][2], activation = 'relu'),
      Dropout(0.3),
      Dense(1, activation='sigmoid')  # Output layer for binary classification
      ])
  else:
    fnn = Sequential([
      Dense(128, activation='relu', input_shape=(input_dim,)),  # Input layer
      Dropout(0.5),  # Dropout for regularization
      Dense(64, activation='relu'),  # Hidden layer
      Dropout(0.4),
      Dense(32, activation = 'relu'),
      Dropout(0.3),
      Dense(1, activation='sigmoid')  # Output layer for binary classification
      ])



      # Compile the model
  fnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
  history = fnn.fit(X_train_opt, y_train, validation_data=(X_val_opt, y_val), batch_size = 64, epochs=50, callbacks=[early_stopping])
  last_val_accuracy = history.history['val_accuracy'][-1]
  results[tuple_option] = last_val_accuracy

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.6807 - loss: 0.5707 - val_accuracy: 0.8618 - val_loss: 0.3328
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8432 - loss: 0.3715 - val_accuracy: 0.8702 - val_loss: 0.3091
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.8557 - loss: 0.3526 - val_accuracy: 0.8709 - val_loss: 0.3077
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8522 - loss: 0.3537 - val_accuracy: 0.8687 - val_loss: 0.3118
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.8572 - loss: 0.3480 - val_accuracy: 0.8739 - val_loss: 0.3046
Epoch 6/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8582 - loss: 0.3401 - val_accuracy: 0.8726 - val_loss: 0.3032
Epoch 7/50
[1m500/50

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.5200 - loss: 0.7260 - val_accuracy: 0.6221 - val_loss: 0.6835
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5369 - loss: 0.6792 - val_accuracy: 0.6816 - val_loss: 0.6509
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.5623 - loss: 0.6618 - val_accuracy: 0.7305 - val_loss: 0.6110
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5924 - loss: 0.6547 - val_accuracy: 0.7354 - val_loss: 0.5938
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.6075 - loss: 0.6446 - val_accuracy: 0.7340 - val_loss: 0.5858
Epoch 6/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.6210 - loss: 0.6345 - val_accuracy: 0.7355 - val_loss: 0.5719
Epoch 7/50
[1m500/500[

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.7178 - loss: 0.5448 - val_accuracy: 0.8509 - val_loss: 0.3520
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8334 - loss: 0.3874 - val_accuracy: 0.8540 - val_loss: 0.3420
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8503 - loss: 0.3569 - val_accuracy: 0.8687 - val_loss: 0.3126
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.8524 - loss: 0.3517 - val_accuracy: 0.8726 - val_loss: 0.3099
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8582 - loss: 0.3437 - val_accuracy: 0.8706 - val_loss: 0.3024
Epoch 6/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8615 - loss: 0.3291 - val_accuracy: 0.8734 - val_loss: 0.3060
Epoch 7/50
[1m500/500

In [35]:
results

{(1, 0): 0.8712499737739563,
 (0, 1): 0.737375020980835,
 (1, 1): 0.8803750276565552}

(1, 0): The model is trained using only embeddings and achieves an accuracy of approximately 87.12%
(0, 1): The model uses only the predefined feature columns and achieves an accuracy of approximately 73.74%
(1, 1): The model combines both embeddings and feature columns and achieves an accuracy of approximately 88.04%

In [36]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [40]:
y_pred_probs = fnn.predict(X_test_opt)  # Predicted probabilities
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary labels (0 or 1)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Precision: 0.8636009353078722
Recall: 0.8795395911887279
F1 Score: 0.8714973945531412


We achieve a precision of 86.36%, a recall score of 87.95% and an F1 score of 87.15%. These high scores indicate our model is quite strong in predicting the sentiment of reviews based on the review text.