In [1]:
# get IMDB Dataset: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

# extract downloaded files
!tar xvzf aclImdb_v1.tar.gz


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
aclImdb/train/unsup/44983_0.txt
aclImdb/train/unsup/44982_0.txt
aclImdb/train/unsup/44981_0.txt
aclImdb/train/unsup/44980_0.txt
aclImdb/train/unsup/44979_0.txt
aclImdb/train/unsup/44978_0.txt
aclImdb/train/unsup/44977_0.txt
aclImdb/train/unsup/44976_0.txt
aclImdb/train/unsup/44975_0.txt
aclImdb/train/unsup/44974_0.txt
aclImdb/train/unsup/44973_0.txt
aclImdb/train/unsup/44972_0.txt
aclImdb/train/unsup/44971_0.txt
aclImdb/train/unsup/44970_0.txt
aclImdb/train/unsup/44969_0.txt
aclImdb/train/unsup/44968_0.txt
aclImdb/train/unsup/44967_0.txt
aclImdb/train/unsup/44966_0.txt
aclImdb/train/unsup/44965_0.txt
aclImdb/train/unsup/44964_0.txt
aclImdb/train/unsup/44963_0.txt
aclImdb/train/unsup/44962_0.txt
aclImdb/train/unsup/44961_0.txt
aclImdb/train/unsup/44960_0.txt
aclImdb/train/unsup/44959_0.txt
aclImdb/train/unsup/44958_0.txt
aclImdb/train/unsup/44957_0.txt
aclImdb/train/unsup/44956_0.txt
aclImdb/train/unsup/44955_0.txt
aclImdb

In [2]:
import os
import pandas as pd
pd.set_option('max_colwidth', 1500)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
def read_imdb_data(data_dir):
    data = {
        'Review': [],
        'Sentiment': []
    }
    #iterating over folders in train/ test data
    for sent_type in ['pos', 'neg']:
      # open the folder(pos/neg)
      path = os.path.join(data_dir, sent_type)
      # iterate over files in each folder
      for file in os.listdir(path):
        fpath = os.path.join(path, file)
        # open the file
        with open(fpath, 'r', encoding='utf-8') as f:
          # checking file type in the folder
          if file.endswith(".txt"):
            data['Review'].append(f.read())
            data['Sentiment'].append(1 if sent_type == 'pos' else 0)
    return pd.DataFrame(data)

In [4]:
train_df = read_imdb_data('aclImdb/train')
test_df = read_imdb_data('aclImdb/test')

In [5]:
train_df.info()
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Review     25000 non-null  object
 1   Sentiment  25000 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 390.8+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Review     25000 non-null  object
 1   Sentiment  25000 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 390.8+ KB


In [6]:
train_df.head(20)

Unnamed: 0,Review,Sentiment
0,"Like others, I have seen and studied most of the books and films concerning the Clutter Killings, including a few dramatic works thematically based on the actions and psycho-mythology of the participants to the crime -- including Capote himself. As to Capote, I cannot forgive him for willfully withholding Perry Smith's confessions, intimacies and writings from even the defense counsels. I believe truths and facts Capote ""reserved"" for his ""book,"" which required for Capote two guilty verdicts and capital punishment, would almost certainly have sustained a successful insanity defense for Perry Smith even under the old McNaughton Rule. Capote himself could never write another major literary work after ""In Cold Blood."" Shame and guilt. In my opinion, he willingly encouraged and planned the brutal capital punishment to provide the spectacular ending he required for his book/drama. To him, both men HAD to die for his book to succeed. The book had to justify itself by pretending it was about the horror of capital punishment. His actions and silence assured that ice-cold conclusion.<br /><br />Capote's book is not truth. It is not factual or journalistic. It is drama and melodrama spiced with his own creatively psychotic imagination. What most people consider the virtues of the contemporaneous first movie are stark images of Capote's mind, which may have been the most cold-blooded aspect of all. No wonder viewers ironically but necessarily prefer Blake's performance. That actor I...",1
1,"I was pleasantly surprised to find that How to Lose Friends and Alienate People was nowhere near as 'gross-out' a comedy as the trailer had led me to expect. I rapidly became absorbed in the unfolding of the narrative and remained engrossed throughout. Pacing of the more visual humorous content was, I thought, spot on. (I mean I got the impression I was witnessing Pegg's attempts at restoring lost control very much 'in real time', so to speak.) At other moments there was time allowed to share the main protagonists' (i.e. Pegg's and Dunst's) reflection on how events were affecting them and what had led them to where they now found themselves. All the characters were well cast, to some extent interesting in and of themselves, and generally quite likable. (Any apparent ruthless ambition displayed tended to be tempered by a corresponding good natured resilience.) An entertaining, intelligently scripted, brilliantly directed and superbly acted film that I would thoroughly recommend.",1
2,"If the themes of The Girl From Missouri sound familiar it should. That's because Anita Loos who wrote the screenplay here also wrote the classic Gentlemen Prefer Blondes. Unlike Marilyn Monroe in that film, Jean Harlow will accept any kind of jewelry from men of means.<br /><br />And it's men of means that Jean Harlow is after. She leaves the road side hash house run by her mother and stepfather because she's decided that the best way to gain the easy life is to marry it. Her talents as a chorus girl are limited, but she'll be able to trade in on that beauty.<br /><br />Her odyssey starts with her and friend Patsy Kelly getting an invitation to perform at a party thrown by millionaire Lewis Stone. But unbeknownst to Jean, Stone's just having a wild last fling before doing himself because of the moneys he owes not owns. Still she wrangles a few baubles from him that fellow millionaire Lionel Barrymore notices. <br /><br />Lionel's amused by it until Jean sets her sights on his playboy son, Franchot Tone. After that he is not amused and he looks to shake Jean from climbing the family tree.<br /><br />The Girl From Missouri went into production mid adaption of The Code so it went under peculiar censorship. I've a feeling we would have seen a much more risqué film. Still Jean Harlow as a younger and sassier version of Mae West is always appreciated. What a great comic talent that woman had, seeing The Girl From Missouri is a sad reminder of the great loss the world of film su...",1
3,Every now and then a film maker brings to life a unique group of people and lets you inside to see the things that make us human. Lawrence Kasden done this again. I always felt theBg Cill was the anthem of it's age and he has managed to do it again in Grand Canyon. Every so often we find ourselves at a point where we have the opportunity to choose life and so often we blow it. This is a film about people who find the courage to choose and experience life because of that choice. The juxtaposing of little and big events that lets us see how basically trivial most things we worry about are is truly genius. I have watched this film a number of times and am constanly surprised at how deep the emotions run through this film. Danny Glover and Kevin Kline do their roles with great tenderness and Stever Martins portrayal of a movie exec is priceless. Thank you again Mr. Kasden,1
4,"The morbid Catholic writer Gerard Reve (Jeroen Krabbé) that is homosexual, alcoholic and has frequent visions of death is invited to give a lecture in the literature club of Vlissingen. While in the railway station in Amsterdam, he feels a non-corresponded attraction to a handsome man that embarks in another train. Gerard is introduced to the treasurer of the club and beautician Christine Halsslag (Renée Soutendijk), who is a wealthy widow that owns the beauty shop Sphinx, and they have one night stand. On the next morning, Gerard sees the picture of Christine's boyfriend Herman (Thom Hoffman) and he recognizes him as the man he saw in the train station. He suggests her to bring Herman to her house to spend a couple of days together, but with the secret intention of seducing the man. Christine travels to Köln to bring her boyfriend and Gerard stays alone in her house. He drinks whiskey and snoops her safe, finding three film reels with names of men; he decides to watch the footages and discover that Christine had married the three guys and all of them died in tragic accidents. Later Gerard believes Christine is a witch and question whether Herman or him will be her doomed fourth husband. <br /><br />The ambiguous ""The Vierde Man"" is another magnificent feature of Paul Verhoeven in his Dutch phase. The story is supported by an excellent screenplay that uses Catholic symbols to build the tension associated to smart dialogs; magnificent performance of Jeroen Krabbé in the ro...",1
5,"Although in my opinion this is one of the lesser musicals of stars Frank Sinatra, Gene Kelly, Kathryn Grayson and director George Sidney, a lesser musical featuring anyone from that line-up is nothing to sneeze at, and in conjunction, the line-up makes Anchors Aweigh a pretty good film despite its flaws.<br /><br />Sinatra and Kelly are Clarence Doolittle and Joseph Brady, respectively, two Navy men. As the film begins, they're just pulling in to the Los Angeles area for some much needed leave. Brady plans on visiting a girlfriend named Lola. Doolittle is still a bit wet behind the ears, appropriately enough, and seeks advice on women from Brady in private (publicly, scriptwriter Isobel Lennart and Sidney have all of the Navy men comically exaggerating their finesse with women to each other). Brady promises to help get Doolittle hooked up, but primarily because Doolittle won't leave him alone otherwise. A kink is put into their plans when local police basically force them to assist with a young boy who is obsessed with the Navy. He won't give the police any information about who he is or where he lives. Brady helps and he and Doolittle end up taking the boy back home. When the boy's guardian, Susan Abbott (Grayson), finally shows up, Doolittle goes gaga for her. Brady tries to convince him to forget about her; Brady just wants to get back to Lola. But they keep getting coaxed back to Abbott's home, and eventually something of a love triangle forms. Things become more comp...",1
6,"If my expectations weren't exceeded, they were certainly met. ""Nancy Drew"" works both as a mystery and a comedy. It pays homage to the books it was based on and spoofs them at the same time. The movie starts with a close-up on a book shelf and an animated credit sequence resembling illustrations from the books. It then begins lampooning the books immediately. Writer/director Andrew Fleming seems to realize the idea of a teenage girl regularly breaking up crime rings in a small town is ridiculous, so he doesn't treat the idea religiously. He pokes fun at Nancy Drew's ability to do anything, such as ace every class in school, know which baked treat is perfect for converting any enemy to her side, make a bird house with twelve flying buttresses, and even perform an emergency tracheotomy at a party. Nancy'd always be in perfect style, if she were living in the wholesome 1950's instead of present day L.A. And she carries around a ""sleuth kit"" complete with fingerprinting dust, flashlight, compass, and magnifying glass. Preposterous, of course, but I seem to remember the Hardy Boys' access at any given moment to CSI equipment being a little too convenient as well. And if the perky pipsqueak private eye is a little younger than her literary counterpart, it's just more comedic exaggeration on the movie's part.<br /><br />The mystery of the movie is handled more seriously. Nancy and her lawyer father move from their small town to Los Angeles, where Nancy digs into a cold case invo...",1
7,"I appreciated the photography, the textures, the colours and often, unlike one comment, the lighting. What was lacking for me was a coherent storyline.I found it often disjointed, badly edited and at times difficult to follow. My version was 110 minutes, IMDb shows one at 125m. Possibly the cuts and subtitles didn't help. I applaud any films that escape from the Hollywood mould but this left me disappointed. Miss Gillain was luminous and the performances were all fine, I just wanted a little more dialogue. If anyone would like to see another film that has some affinity with this one, try 'Hideous Kinky'with Kate Winslet.",1
8,"The late, great Robert Bloch (author of PSYCHO, for those of you who weren't paying attention) scripted this tale of terror and it was absolutely one of the scariest movies I ever saw as a kid. (I had to walk MILES just to see a movie, and it was usually dark when I emerged from the theater; seeing a horror movie was always unnerving, but particularly so when it was as well-executed as this one.) When I had the opportunity to see this one several years ago on videotape (which should always be a last resort), I was surprised at how well it held up. Take the terror test: watch it at night, alone, and THEN tell me it's not scary...",1
9,"Okay, we've got extreme Verhoeven violence (Although not as extreme as other Verhoeven flicks), we've got plenty of sex and nudity, but something is missing...Oh, yes, it's missing the intelligence that Paul Verhoeven is known for in his sci-fi movies. I admire the way Verhoeven introduces the characters and how they have a sense of humor, but unlike most Verhoeven films, the movie itself doesn't have enough humor for it to fall into the comedy genre. The acting overall was above average compared to most slasher films.<br /><br />What makes Hollow Man a good movie is not the story, not the cast or characters, but the amazing special effects work that would otherwise make a film like this impossible. The crew has truly made an invisible man, without the use of things like a floating hat suspended on piano wires and other practical effects (effects done on set). The most stunning effects scenes are not seen while Kevin Bacon is invisible, they are when Kevin Bacon is becoming invisible and visible.<br /><br />The problem is that this invisible man story deserves to be more imaginitive. Here, it takes place at a lab for the most part. I would have enjoyed seeing the invisible Kevin Bacon robbing a bank and getting away with it, or let's say steal something from people's purses, or something like that. But what is shown is decent enough to make Hollow Man an entertaining movie. Grade: B",1


In [7]:
train_df.tail(20)

Unnamed: 0,Review,Sentiment
24980,"To describe this film as garbage is unfair. At least rooting through garbage can be an absorbing hobby. This flick was neither absorbing nor entertaining.<br /><br />Kevin Bacon can act superbly given the chance, so no doubt had an IRS bill to settle when he agreed to this dire screenplay. The mad scientist story of 'Hollow Man' has been told before, been told better, and been told without resorting to so many ludicrously expensive special effects.<br /><br />Most of those special effects seem to be built around the transparent anatomical dolls of men, women and dogs you could buy in the early seventies. In the UK they were marketed as 'The Transparent Man (/Woman/Dog)' which is maybe where they got the title for this film.<br /><br />Clever special effects, dire script, non-existent plot.<br /><br />",0
24981,"Undeveloped/unbelievable story line,(by the time I sort of figured out where it was going, I no longer cared) bad casting.(come on... William Macy as a hit man???) bad directing,(have you ever seen Tracey Ullman perform SO badly?)(Was I supposed to care what happened to the unethical incompetent, uncaring John Ritter character?) bad script...( Really, I'm not looking for a formula script but this was really awful) the only Really good thing in it was the kid. Ten lines? It's not OK if your comment is less than ten lines? COme on-- whose rules are those? Why can't I say what I have to say in less than 10 lines??? Isn't that kind of arbitrary? Why isn't it OK to have less than 10 lines of comment?",0
24982,"**SPOILERS*** Slow as molasses mummy movie involving this expiation in the Valley of the Kings in Egypt that has to be aborted in order to keep the native population, who are at the time revolting against British rule, from finding out about it.<br /><br />Given the task of getting to this archaeological dig by his superiors British Capt. Storm, Mark Dana, together with a couple of British soldiers and Mrs. Sylvia Quentin, Diane Brewster, the wife of the head man at the dig Robert Quentin, George N. Neise, make their way to the unearthed mummy's tomb. On the way there Capt. Storm Sylvia and his men run into this desert-like princess Simira, Ziva Rodann.<br /><br />Simira seems to be superhuman in her ability to withstand the rigors of desert life, she doesn't drink water or get tired, but also knows just what Capt. Storm & Co. are looking for and warns him and his group to stay as far away from the dig, Pharaoh's Ra Ha Tet tomb, as possible.<br /><br />At Ra Ha Tet's burial chamber Robert Quentin and his crew of archeologist's together with his Egyptin guide Simira's brother Numar, Alvaro Guillot,already opened his tomb before Capt. Storm can get there to stop them. Quentin violated Ra Ha Tet's body by having Dr. Farrady, Guy Prescott, cut his bandages. This action on Robert's and Dr. Farrady's part has Numar faint dead in his tracks. It later turns out that Numar somehow was possessed by Ra Ha Tet's spirit or soul who took over his body and caused him to age, at the rate...",0
24983,"I got Mirror Mirror mainly because Yvonne De Carlo was in it (I thought she was great in American Gothic) but sadly she didn't have a very big role in this film. It starts off OK and the pace moves along nicely...but by the end it starts getting a bit tedious and dull. That's not to say that this is a boring film, but it's just very average and nothing spectacular. I didn't like the ""posession"" side of it and there were no decent gore scenes. Plus the 'main' story was very confusing and the ending doesn't make much sense at all. I did however like the story surrounding the Gothic girl and how she got revenge on her tormentors.<br /><br />I wouldn't particularly recommend Mirror Mirror to horror fans - it's nothing to wet yourself over.",0
24984,"Would have better strengthened considerably by making it as a<br /><br />50 minute episode of the Outer Limits. Too much superfluous material and stuff like the chief bad guy looking like he'd escaped from The Phantom of the Opera didn't help. The whole 'Night of the Living Dead' sequence was extremely silly and quite unnecessary. After all, if the dead were to punish anyone for their sins, now remind me exactly who was killing everyone again?",0
24985,"Some people have made a point of dissing this movie because they question the plausibility of black people in the Old West, Asian people in the Old West or women with guns in the Old West period. Get a grip and read a book. There were quite a few Asians (Chinese), there were quite a few blacks (freedmen) and everybody outside of the gentile class had ready access to guns; it is the second amendment you know. And as far as the use of modern language goes, none of those Westerns people have waxed nostalgic about actually used language that was consistent with the era depicted. Americans had different accents, used different inflections, spoke at a very different pace and used plenty of words and phrases that would be unrecognizable today. Don't blame historical inaccuracy for the fact that you just didn't dig it. Be honest. Maybe you're just uncomfortable with what you're seeing.",0
24986,"Penn takes the time to develop his characters, and we almost care about them. However there are some real problems with the story here, we see no real motivation for the evil brother's behavior, and the time line is screwed up. Supposedly set in 1963, the music is late 60s/early 70s. The references and dialogue is 70s/80s. The potential for a powerful climax presents itself, and Penn allows it to slip away. But even with all these difficulties it is worth the watch, but not great.",0
24987,"I always found Betsy Drake rather creepy, and this movie reinforces that. As another review said, this is a stalker movie that isn't very funny. I watched it because it has CG in it, but he hardly gets any screen time. It's no ""North by Northwest""...",0
24988,"If you like the standard Sly flicks that involve over the top action, unbelievable stunts (unbelievable is not intended to be complimentary here), and retarded dialogue; you will love this steaming pile of mountain goat dung. I had high hopes based on the trailer. I thought that Stalone was going to be forced in his ""has-been"" days to yield to smarter people and make an action film that would place a credible hero in a credible situation where the story, setting, and (believable) action would prevail. I crave action that is at least close enough to reality that you can imagine the fear and excitement that would come from such an event. My limited knowledge of hypothermia and its effects rendered at least one scene laughably ridiculous. Judge Dredd is only better because you know going into the theater that you are going to see a comic book made into a movie. The character, setting and everything else are beyond comparison to anything we might encounter ourselves. Cliffhanger on the other hand turns a mountain climbing guide into Rambo before you can say ""yo, Adrian!""",0
24989,"There are so many goofy things about this movie that I can't possibly name but a few:<br /><br />BOGART's character: 1. His name  Whip McCord (too easy, so I'll leave it at that. Boy, it makes `Humphrey' sound good.) 2. His long, curly hair and silly sideburns. 3. His Black Bart get-up, complete with spurs! 4. Not sure what shade of lipgloss they've got him wearing, but it ain't none too flattering.<br /><br />CAGNEY's character (Jim Kincaid ): 1. His lipstick doesn't do him any favors, either. 2. The man is being swallowed by his hat during the entire film! Could they not find a hat to fit him? Even a LITTLE?!!?! 3. His pants are too tight in the rear. 4. He blows the smoke off his gun one too many times, if you know what I mean, and I think you do.<br /><br />If you are a casual Bogart or Cagney fan, and figure it might be a change of pace to see them in a western, do yourself a favor and forget that thought. EVEN THE HORSES LOOK EMBARRASSED! (That is, when they don't look bored.)<br /><br />In all fairness, I admit that westerns are my least favorite film genre, but I've still seen much, MUCH better than this.<br /><br />On a comedy level, or as high camp, The Oklahoma Kid works. Otherwise, it's viewer beware. Therefore, see this only if a) you must see every western out there b) you are a TRUE Cagney or Bogie completist c) any of the above comments appeal to you. Woah..",0


In [8]:
test_df['Review']

Unnamed: 0,Review
0,"What a show! Lorenzo Lamas once again proves his talent as a cop who committed the worst crime a good cop can commit, by being a good cop. Then, again, he shows how sensitive a cop can be, displaying a range of emotions like no other actor can except, maybe, himself in Terminal justice.<br /><br />HUGE ENJOYMENT!"
1,"I caught this movie on the Horror Channel and was quite impressed by the film's Gothic atmosphere and tone. As a big fan of all things vampire related, I am always happy to see a new variation of the vampire mythos, in this case, a ghoul-like creature residing in a Lovecraftian other dimension. The director has done a brilliant job of conveying the dark mood of the subject, using the decadent art scene as a backdrop to what is essentially a tale of love spanning time and space- the pure love of friendship opposed to the lust for blood and life by the vampires in the story. The characters in the story are transported to another dimension by the means of a mind-altering substance, where a shape-shifting vampire creature appears to grant them their hearts desires, whilst draining them of their life essence. There are some analogies to drug addiction and loss of control, and how this affects a group of friends in an artistic circle. I enjoyed watching the 2 main male characters in the story, Chris Ivan Cevic and Alex Petrovich, who were very attractive hunks, always a plus point in a vampire story for the female viewers! The special effects make up and creature effects were well done, and the set design of the vampire's dimension was very effective. All in all, an enjoyable take on vampire myths, and recommended for anyone who likes their vampires with some intelligence and not just action. The only thing missing to make it even better would have been a bit more eroticism and..."
2,"""I know I'm human. And if you were all these things, then you'd just attack me right now, so some of you are still human. This thing doesn't want to show itself, it wants to hide inside an imitation. It'll fight if it has to, but it's vulnerable out in the open. If it takes us over, then it has no more enemies, nobody left to kill it. And then it's won.""<br /><br />John Carpenter's ""The Thing"" is one of the most entertaining horror films ever made  fast, clever and purely exciting from start to finish. This is how all movies of the genre should be made.<br /><br />Taking place in the Antarctic in 1982, the movie focuses specifically on a group of American scientists. We are given no introduction to their mission, but are thrust into their existence when a pair of seemingly crazy Norwegians appears at their base camp, chasing an escaped dog. The Norwegians are killed, and the dog finds its way into the colony, which is when things really start to get crazy.<br /><br />It is soon made quite clear that the ""dog"" is actually a shape-shifting alien organism, which manifests itself upon the physical form of its victims  in other words, it begins to eat the Americans, and imitate them so well that the remaining humans cannot discern the difference between their friends and enemies<br /><br />The pack of scientists, led by MacReady (Kurt Russell), begin to fight for their own survival, using wits instead of brawn. If the Thing is indeed amongst them, then how are they to go abo..."
3,"The great thing about Thirst, Chanwook Park's latest film, is that it's the anti-Twilight. Some of you may take that as a minus, but in reality it's a big plus. Park takes the method of vampirism seriously, and as well the torrid love story between Sang-hyeon and Tae-Joo. We see the conflicts of both of the characters- Sang-hyeon being a priest who undergoes a medical experiment that, unbeknownst to him, turns him into a sickly but true-blue vampire, and Tae-Joo with her mother and ""idiot"" brother, the latter is killed by Sang- as in a very strong melodrama. There's nothing terribly weepy or insipid with the story and characters at any point, and the implications put forth from religion early on (Sang, for example, is seen as a healer of sorts since he rose from the dead thanks to his vampirism, even as he just can't be that and knows it) on top of those about good vs evil, push it up into another plane cinematically.<br /><br />That Thirst also rises up to the awesome standard of artistry that Park has displayed with Oldboy, Lady Vengeance and the underrated I'm a Cyborg but That's OK, should be taken as a given. Thirst is a film with a juicy narrative and bizarre suburban characters, and is shot and edited with an eye for a mood that is part satiric, part romantic/erotic, part dramatic and lastly fantastical. And it doesn't always treat vampirsim as something of a simple horror movie set-up (though as a horror movie Park has more than his share of scary scenes). It's mo..."
4,"A film about the relationship between a man and leopard that's very reminiscent of ""The English Patient,"" even down to a scene similar to when Ralph Fiennes' character carries the body of his lover across a desert-rock cliff. In ""A Passion in the Desert,"" the main character carries the body of the leopard across a desert-rock cliff but in the opposite direction (calculated decision or unconscious contrast?). Historically expanded from a very short Balzac story, the film is not perfect but a treat no less. Final shot will haunt me for weeks. (8 of 10)"
...,...
24995,"I watched this last night on Sundance. Altman must be the most hit or miss director of note ever. This show, despite its ""star power"" is utterly non-compelling, and its political insights--which I as a proud liberal in no way disagree with--are shallow and clunky, and seem ripped from the headlines of USA Today, despite the fact it's coming out of the mouth of someone as esteemed as Mario Cuomo. The drama, as such, is not very dramatic, and the comedy is not funny. The only points of interest, really, are seeing how New Yorkers live their lives, and the loyalty of a cast and crew to reassemble a show that keeps insisting has some cult following from 1988. Sometimes it seems like Altman's sole contribution to cinema has been the art of having all your actors talk at once, the effect of which is one feels depressingly like they're a stranger at a wedding."
24996,"Given the subject matter of drug addiction Down to the Bone almost can't help but be a rather depressing film. But depressing doesn't necessarily have to mean bad. Unfortunately in this case it is in fact pretty bad. The film has some good things going for it, most notably the quality performance of Vera Farmiga in the central role of Irene, a working mom struggling with a cocaine addiction. But there isn't enough good here to outweigh the bad. The film's failings lie mainly with the story, which fails to captivate and never really seems to get going. Irene goes to rehab and comes home to a clueless husband who has no idea how to support her attempt to kick her habit. Irene grows close to another recovering addict, a male nurse from her rehab center. Complications ensue. But the story never really sparks to life. It doesn't seem as if the movie is really going anywhere. You can say it's a stark, realistic look at the day-to-day struggles of an addict. Maybe so but in this case it doesn't make for an interesting movie. The whole thing has a very ""blah"" feel to it. The minimalist cinematography doesn't help matters, adding another layer of drab to the incredibly drab proceedings. And none of the other performances measure up to Farmiga's. Hugh Dillon is OK as Irene's male nurse friend but nobody else in the cast adds anything of value to the proceedings. All in all this movie is a bleak, depressing and rather dull ride."
24997,"not many people outside poland have had an opportunity to become familiar with andrzej sapkowski's brilliant writings. he's very popular in poland for his fantasy short stories ( i believe none of them has ever been translated intrto english. alas!). to make a long story short, wiedzmin - the main character of sapkowski's books - is a traveling monster slayer, a man of extraordinary strenght and skill: he's pretty much your favourite tolkien-style cool guy. unfortunately, no one would figure this out after watching the film. 'wiedzmin' the movie is nothing but a collection of random scenes, featuring wiedzmin and other characters from sapkowski's writings, but not eben remotely resembling the plot and dramatic pace of the original. event the fact that some of the shots in the film show attractive naked women does not add any quality to it. the movie gets worse and worse with every minute, and does not even meet the requirements of 'so bad it's actually good' category. if you really are into fantasdy and want to learn something about wiedzmin, read the books instead."
24998,"The film was disappointing. I saw it on Broadway with Bernadette Peters and she was outstanding. Maybe as she, herself graps on to the end of her musical career, her condtion of desperatation lands her in role that she flaunts, re-invents and triumps as her own. Bette's singing is always belted, always flat and lacking to show her ability as an actress. To be entertaining, this performance was dying for a stronger lead and a stronger cast, so that the others would be memorable in Bette's absence. Another criticism: she smiles directly into the camera every time she start singing! I know it is musical theater, but please leave some grace sociale-- Middler cannot perform like Liza or Streisand might in a retrospective tour - out of character and out of context."


In [11]:
text = train_df['Review'][0]
pattern = '<\w* />*'
ma = re.findall(pattern, text)
print(ma)

['<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />', '<br />']


In [12]:
# apply tokenization
train_df['Review'] = train_df['Review'].apply(lambda x: re.sub(pattern, '', x)).apply(lambda x: nltk.word_tokenize(x))
print(train_df.head())
test_df['Review'] = test_df['Review'].apply(lambda x: re.sub(pattern, '', x)).apply(lambda x: nltk.word_tokenize(x))
print(test_df.head())

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Review  \
0  [Like, others, ,, I, have, seen, and, studied, most, of, the, books, and, films, concerning, the, Clutter, Killings, ,, including, a, few, dramatic, works, thematically, based, on, the, actions, and, psycho-mythology, of, the, participants, to, the, crime, --, including, Capote, himself, ., As, to, Capote

In [13]:
train_df['Review'] = train_df['Review'].apply(lambda x: [word.lower() for word in x])
print(train_df.head())
test_df['Review'] = test_df['Review'].apply(lambda x: [word.lower() for word in x])
print(test_df.head())

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Review  \
0  [like, others, ,, i, have, seen, and, studied, most, of, the, books, and, films, concerning, the, clutter, killings, ,, including, a, few, dramatic, works, thematically, based, on, the, actions, and, psycho-mythology, of, the, participants, to, the, crime, --, including, capote, himself, ., as, to, capote