In [26]:
import spacy
import pickle
import pandas as pd

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer, WordNetLemmatizer
from nltk.classify import NaiveBayesClassifier

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [27]:
ds = pd.read_csv('../imdb-movies-dataset.csv')
ds = ds.dropna()
ds

Unnamed: 0,Title,Rating,Review
0,The Idea of You,6.4,"This film, as well as the reaction to it, is a..."
1,Kingdom of the Planet of the Apes,7.3,"I'm a big fan of all the planet of the apes, a..."
2,Unfrosted,5.5,Pretty much the worst criticism you can lay on...
3,The Fall Guy,7.3,Just got out of the Austin premier at SXSW and...
4,Challengers,7.7,This is a tough one. I liked the concept and t...
...,...,...,...
4995,What a Girl Wants,5.8,The folks banging this movie have to get real....
4996,The Duchess,6.9,After reading copious amounts of mediocre revi...
4997,When a Stranger Calls,5.1,Once in a great while I will watch a movie tha...
4998,The Ice Road,5.6,"A script full of flaws, ridiculous situations,..."


In [28]:
stopword = stopwords.words('english')
stemmer = SnowballStemmer('english')
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = text.lower()
    text = word_tokenize(text)
    text = [word for word in text if word not in stopword]
    text = [word for word in text if word.isalpha()]
    text = [stemmer.stem(word) for word in text]
    text = [lemmatizer.lemmatize(word) for word in text]
    return text

In [29]:
def extract_feature(review):
    review = preprocess(review)
    return {word: True for word in review if word in review}

In [30]:
def train_model():
    x = ds['Review']
    y = ['Postive' if rating > 5 else 'Negative' for rating in ds['Rating']]

    all_review = ''.join(x)
    all_tokens = preprocess(all_review)
    feature_sets = [(extract_feature(review), rating) for review, rating in zip(x,y)]

    train_count = int(len(feature_sets) * 0.8)
    train_set = feature_sets[:train_count]
    test_set = feature_sets[train_count:]

    global classifier
    classifier = NaiveBayesClassifier.train(train_set)

In [31]:
def predict_review(review):
    review = extract_feature(review)
    return classifier.classify(review)


In [32]:
vectorizer = TfidfVectorizer()

def get_similarity_count(q,t):
    tv = vectorizer.fit_transform(t)
    qv = vectorizer.transform([q])
    return cosine_similarity(qv, tv)

def get_top_movies(q):
    similarity = get_similarity_count(q, ds['Review'])
    data = {'title': ds['Title'], 'review': ds['Review'], 'similarity': similarity[0] }
    df = pd.DataFrame(data).sort_values(by='similarity', ascending=False).head(2)

    print(f"1: {df['title'].mode()[0]}")
    print(f"2: {df['title'].mode()[1]}")

In [None]:
nlp = spacy.load('en_core_web_sm')

def get_entities(review):
    doc = nlp(review)
    cat = {}

    for ent in doc.ents:
        label = ent.label_
        #if label not in ['Language', 'LOC', 'GPE']: continue
        if label not in cat.keys():
            cat[label] = []
        cat[label].append(ent.text)
    
    return cat

In [34]:
def check_model():
    try:
        with open('model.pickle', 'rb') as f:
            global classifier
            classifier = pickle.load(f)
            print('model loaded')
    except:
        with open('model.pickle', 'wb') as f:
            print('training model')
            train_model()
            pickle.dump(classifier, f)

check_model()


training model


In [35]:
def main():
    check_model()

    review = ""

    while True:
        print("MOVIE RECOMENDATION APPLICATION BASED ON REVIEWS")
        if review == "":
            print("Your Review: NO REVIEW")
            print("Your Review Category: UNKNOWN")
        else:
            print(f'Your Review: {review}')
            print(f'Your Review CategoryL {predict_review(review)}')
        
        print('1. Write your review')
        print('2. View Movie Recommendation')
        print('3. View Named Entities Recognition')
        print('4. Exit')

        choice = input('>> ')

        if choice == '1':
            while True:
                review = input('Input your review: ')
                if len(review.split()) >= 20:
                    print(predict_review(review))
                    print()
                    print('[>] Press enter to continue...')
                    break
                else:
                    print('Has to be at leas 20 words...')
        elif choice == '2':
            if review == '': continue
            print('TOP 2 MOVIE RECOMMENDATION FOR YOU:')
            get_top_movies(review)
            print()
            print('[>] Press enter to continue...')
        elif choice == '3':
            if review == '': continue
            entities = get_entities(review)
            print('CATEGORIZED NAMED ENTITIES')
            for i in entities:
                print(f"{i}: {' '.join(entities[i])}")
            print()
            print('[>] Press enter to continues')
        
        elif choice == '4':
            return
        
        else:
            print('Insert a valid option...')

        input()


In [38]:
main()

model loaded
MOVIE RECOMENDATION APPLICATION BASED ON REVIEWS
Your Review: NO REVIEW
Your Review Category: UNKNOWN
1. Write your review
2. View Movie Recommendation
3. View Named Entities Recognition
4. Exit
Postive

[>] Press enter to continue...
MOVIE RECOMENDATION APPLICATION BASED ON REVIEWS
Your Review: I just saw Dune: Part Two at a cinema here in Jakarta, and it is a true sci-fi epic. Denis Villeneuve has crafted a visual masterpiece, and the performance by Timothée Chalamet was absolutely captivating. The haunting soundtrack and complex narrative make this Warner Bros. film an unforgettable experience that goes beyond typical blockbusters
Your Review CategoryL Postive
1. Write your review
2. View Movie Recommendation
3. View Named Entities Recognition
4. Exit
TOP 2 MOVIE RECOMMENDATION FOR YOU:
1: Dune
2: Incendies

[>] Press enter to continue...
MOVIE RECOMENDATION APPLICATION BASED ON REVIEWS
Your Review: I just saw Dune: Part Two at a cinema here in Jakarta, and it is a true