^C


In [2]:
import requests
import re
import math
import pandas as pd
import numpy as np
import os
import time
import pickle
from lxml import etree
from tkinter import *
from tkinter import messagebox
from sklearn.feature_extraction.text import CountVectorizer
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
from operator import itemgetter

In [3]:
NB = pickle.load(open('naivebayes.sav', 'rb'))
vectorizer = pickle.load(open('vector.sav', 'rb'))

In [4]:
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36 Edg/99.0.1150.39'
}

In [5]:
def scrape_review(link):
    review_list = []
    i = 0
    
    while True:
        length = len(review_list)
        url = link+'/user-reviews?page=' + str(i)
        response = requests.get(url=url, headers=headers)
        xml = etree.HTML(text=response.text)
        items = xml.xpath('//div[@class="review pad_top1"]')

        for j in items:
            rate = j.xpath('.//div[@class="left fl"]/div/text()')
            name = j.xpath('.//span[@class="author"]/a/text()')
            date = j.xpath('.//span[@class="date"]/text()')
            if not j.xpath('.//span[@class="blurb blurb_expanded"]/text()'):
                review = j.xpath('.//div[@class="review_body"]/span/text()')
            else:
                review = j.xpath('.//span[@class="blurb blurb_expanded"]/text()')

            dic = {
                'NAME': name,
                'DATE': date,
                'REVIEW': review,
                'RATE': rate,  
            }
            review_list.append(dic)

        i+=1

        # if length remains, it means that no more reviews
        if len(review_list) == length:
            df = pd.DataFrame(review_list)
            return df

In [6]:
def button_command():
    global review, matrix
    try:
        text = entry.get()
        review = scrape_review(text)

        # remove punctuations
        for column in review.columns:
            review[column] = review[column].apply(lambda x: str(x))
            review[column] = review[column].apply(lambda x: x[2:-2])

        review['RATE'] = review['RATE'].apply(lambda x:int(x))
        
        category_name = ['Positive', 'Negative']
        matrix = vectorizer.transform(review['REVIEW'])
        review['PREDICT'] = NB.predict(matrix)
        predict_count = review['PREDICT'].value_counts()
        graph = predict_count.plot(kind='pie',
                                   colors=['#47bfbd', '#ff3c44'],
                                   title='PREDICTED SENTIMENT',
                                   autopct='%1.0f%%',
                                   labels = category_name,
                                   figsize=(4,4)).get_figure()

        counts = pd.DataFrame(matrix.toarray(), columns=vectorizer.get_feature_names())
        lst = []
        for i in counts.columns:
            lst.append(i)

        dic = {}
        for i in range(len(counts.columns)):
            dic[lst[i]] = counts[lst[i]].sum()

        top = dict(sorted(dic.items(), key=itemgetter(1), reverse=True)[:10])
        keys = list(top.keys())
        values = list(top.values())

        # Show review_count
        rcount_label2 = Label(window, text=len(review))
        rcount_label2.place(x=75, y=90)
        rcount_label2.config(font=("Courier", 30), fg='#ff3c44')

        # Show overall_score
        pos = (review['PREDICT'] == 1).sum()
        neg = (review['PREDICT'] == 0).sum()
        overall_score = round((pos/(pos+neg))*100, 2)
        overall_label2 = Label(window, text = str(overall_score)+'%')
        overall_label2.place(x=215, y=90)
        overall_label2.config(font=("Courier", 30))
        if overall_score > 50:
            overall_label2.config(fg='#47bfbd')
        else:
            overall_label2.config(fg='#ff3c44')

        # Plot bar chart
        plot = FigureCanvasTkAgg(graph, window)
        plot.get_tk_widget().place(x=395, y= 75)

        # Plot words frequency    
        for i in range(10):
            txt1 = keys[i]
            txt2 = values[i]
            word = Label(window, text=f'{txt1:15}\t {values[i]}')
            word.place(x=80, y=185+i*20)
            word.config(font=("Courier", 11))
            
        return review
            
    except Exception as e:
        print(e)
        messagebox.showwarning(title='WARNING', message='Invalid Link!')

In [9]:
def download_command():
    try:
        text = entry.get()
        movie_name = re.findall('movie/(.*)', text)[0]
        review.to_csv(movie_name + '.csv', index=False, encoding='utf-8')
        messagebox.showinfo(title='DOWNLOAD', message='Reviews Downloaded Successfully!')
    
    except:
        messagebox.showwarning(title='WARNING', message='Review Download was unsuccessful!')

In [10]:
window = Tk()
window.geometry("700x450")
window.title('Metacritic Sentiment Analysis')

entry = Entry(window, width=90)
entry.place(x=45, y=15)

btn1 = Button(window, text='Enter', command=button_command)
btn1.place(x=600, y=10)

rcount_label1 = Label(window, text='Total Reviews:')
rcount_label1.place(x=60, y=70)
rcount_label1.config(font=("Courier", 10))

overall_label1 = Label(window, text='Overall Ratings:')
overall_label1.place(x=220, y=70)
overall_label1.config(font=("Courier", 10))

word_label = Label(window, text='10 Most Frequent Word:')
word_label.place(x=60, y=160)
word_label.config(font=("Courier", 10))

btn2 = Button(window, text='Download Reviews', command=download_command)
btn2.place(x=570, y=400)

window.mainloop()

'RATE'
'RATE'


In [322]:
# url exmaple:
# https://www.metacritic.com/movie/morbius
# https://www.metacritic.com/movie/the-batman
# https://www.metacritic.com/movie/the-godfather
# https://www.metacritic.com/movie/the-avengers-2012
# https://www.metacritic.com/movie/black-panther