In [169]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

In [170]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")  

In [171]:
tokens = tokenizer.encode("I like the food, but could've been better" , return_tensors="pt")

In [172]:
tokens

tensor([[  101,   151, 11531, 10103, 15225,   117, 10502, 12296,   112, 10312,
         10662, 16197,   102]])

In [173]:
tokenizer.decode(tokens[0])

"[CLS] i like the food, but could've been better [SEP]"

In [174]:
result = model(tokens)

In [175]:
result.logits

tensor([[-2.5290, -0.1687,  2.3435,  1.7045, -1.2451]],
       grad_fn=<AddmmBackward0>)

In [176]:
torch.argmax(result.logits)

tensor(2)

In [177]:
int(torch.argmax(result.logits)) + 1 

3

In [185]:
# r = requests.get('https://www.imdb.com/title/tt7286456/reviews?ref_=tt_ql_3')
# soup = BeautifulSoup(r.text, 'html.parser')
# regex = re.compile('.*comment.*')
# results = soup.find_all('p', {'class':regex})
# reviews = [result.text for result in results]

In [193]:

def scrape_imdb_reviews(movie_id):
    url = f'https://www.imdb.com/title/{movie_id}/reviews?ref_=tt_ql_3'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()  

        soup = BeautifulSoup(r.text, 'html.parser')
        results = soup.find_all('div', class_='text show-more__control')
        reviews = [result.text.strip() for result in results]

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        reviews = []

    return reviews

movie_id = 'tt7286456'  # Joker movie ID
reviews = scrape_imdb_reviews(movie_id)

for idx, review in enumerate(reviews, 1):
    print(f"Review {idx}: {review}\n")

Review 1: There is no doubt that the movie was well thought of - from the plot to execution. I must praise Phoenix's superb acting. He managed to portray very believably the awkward social reject and also the crazy murderer that he became. I really think it's a role only few could do. It was also very interesting to watch how he deteriorated mentally and little by little gave up on trying to fit into society. Gotta hand it to the script writer - the plot was unpredictable.But...There wasn't anything too complex about Arthur's mental state. No deep psychological analysis, or a web of many deep factors that created a one of a kind psychotic maniac. Could it be that I was expecting too much? Very possible since I came with very high expectations after reading some reviews.
What doesn't have anything with expectations is that at the end he didn't remind much of the original Joker. He felt more like a simply society reject that went completely crazy and started killing people. Not that much

In [187]:
import numpy as np
import pandas as pd

In [194]:
df = pd.DataFrame(np.array(reviews), columns=['review'])


In [195]:
df['review'].iloc[0]

"There is no doubt that the movie was well thought of - from the plot to execution. I must praise Phoenix's superb acting. He managed to portray very believably the awkward social reject and also the crazy murderer that he became. I really think it's a role only few could do. It was also very interesting to watch how he deteriorated mentally and little by little gave up on trying to fit into society. Gotta hand it to the script writer - the plot was unpredictable.But...There wasn't anything too complex about Arthur's mental state. No deep psychological analysis, or a web of many deep factors that created a one of a kind psychotic maniac. Could it be that I was expecting too much? Very possible since I came with very high expectations after reading some reviews.\nWhat doesn't have anything with expectations is that at the end he didn't remind much of the original Joker. He felt more like a simply society reject that went completely crazy and started killing people. Not that much differe

In [196]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [198]:
sentiment_score(df['review'].iloc[1])

4

In [199]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [200]:
df

Unnamed: 0,review,sentiment
0,There is no doubt that the movie was well thou...,4
1,The movie affects you in a way that makes it p...,4
2,I thought this film was good but I just don't ...,3
3,"Every once in a while a movie comes, that trul...",4
4,This is a movie that only those who have felt ...,5
5,"Truly a masterpiece, The Best Hollywood film o...",5
6,Joaquin Phoenix gives a tour de force performa...,4
7,Most of the time movies are anticipated like t...,2
8,Let me start off by saying if Joaquin Phoneix ...,5
9,Do not really understand all the tens here. Su...,3


In [201]:
df['review'].iloc[12]

'I know this is an unpopular opinion, but honestly this movie is overrated and boring. 2 hours felt like 3 and a half.I don\'t buy the whole thing where he is invited to the show. The meta "standup" he made was extremely dull and boring. Then gets allowed to broadcast for that long without being interrupted, also being transported in the most shitty police-car ever after that, it\'s just not believeable enough to me.He also acted the same when he was on meds aswell as off, so that part didn\'t make sense either. The police must also be really not catching this guy faster after the homocide.It has some good parts but they can be boiled down to maybe 5 minutes.\nIs this a movie I would watch again? Probably not.'