In [155]:
import imdb
import requests
from bs4 import BeautifulSoup
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [157]:
# Otrzymanie comentarzów 
def get_imdb_reviews(movie_title):
    url = f"https://www.imdb.com/title/{movie_title}/reviews"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    review_containers = soup.find_all("div", class_="lister-item-content")

    reviews = []
    for container in review_containers:
        review_text = container.find("div", class_="text").text.strip()
        reviews.append(review_text)

    return reviews

In [158]:
def load_data():
    movie_ids = ['tt1375666']  # Id filmu dla treningu
    reviews = []
    for movie_id in movie_ids:
        reviews.extend(get_imdb_reviews(movie_id))
    return reviews

In [159]:
reviews = load_data()
# ocena comentarza (0 - zly 1 - dobry)
labels = np.array([1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
                   0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
                   1, 0, 1, 0, 1])

In [160]:
# Tworzenie i trening modelu
tokenizer = Tokenizer(num_words=10000)  # Ograniczenie słownictwa do 10 000 najczęściej używanych słów.
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)
padded_sequences = pad_sequences(sequences, maxlen=100)  # Ogranicz długość sekwencji do 100 słów



In [161]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [162]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(padded_sequences, labels, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f2b0ce8c340>

In [163]:
# Funkcja analizy filmu
def analyze_movie(movie_id):
    reviews = get_imdb_reviews(movie_id)
    sequences = tokenizer.texts_to_sequences(reviews)
    padded_sequences = pad_sequences(sequences, maxlen=100)
    predictions = model.predict(padded_sequences)
    average_sentiment = predictions.mean()
    return average_sentiment

In [164]:
def get_imdb_movie_id(movie_title):
    movies = ia.search_movie(movie_title)
    movie = movies[0]
    movie_id = movie.getID()
    return 'tt' + movie_id

In [165]:
title = 'Toy Story'
mv = get_imdb_movie_id(title)
#movie_title = get_imdb_movie_title_by_id(movie_id)
sentiment = analyze_movie(mv)
if sentiment > 0.5:
    print(f"The movie '{title}' is recommended.")
else:
    print(f"The movie '{title}' is not recommended.")

The movie 'Toy Story' is recommended.
