# Import Module

In [1]:
import requests
import re
from bs4 import BeautifulSoup
from time import sleep
from random import random

# 整合

In [2]:
class YahooMovies:
    def __init__(self):
        self.movie_link = None

    def user_input(self):
        while True:
            try:
                movie_name = input("請輸入欲搜尋的電影名稱：")
                if movie_name.strip():
                    return movie_name
            except:
                print('\nTry Again')
                return None

    def search_movie(self, query):
        encoded_query = requests.utils.quote(query)
        url = f"https://movies.yahoo.com.tw/moviesearch_result.html?keyword={encoded_query}"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        movie_titles = soup.find_all("div", class_="release_movie_name")

        def search_check(movie_titles):
            res_count = len(movie_titles)
            if res_count == 0:
                return query, None
            elif res_count > 1:
                for m in movie_titles:
                    ans = None
                    while ans != 'Y' and ans != 'N':
                        ans = input(f'找到多部電影，返回第一個搜尋{m.a.text}(Y)或是往下搜索(N):')
                    if ans == 'Y':
                        return m.a.text, m.a['href']
                    elif ans == 'N':
                        continue
                # 該次搜尋都沒找到，重新搜尋
                self.search_movie(self.user_input())
                sleep(1 + random())
            else:
                return movie_titles[0].a.text, movie_titles[0].a['href']

        return search_check(movie_titles)

    def specific_movie_info(self, movie_link):
        def review_latest(review_link):
            if review_link is None:
                return None
            res = []
            response = requests.get(review_link)
            soup = BeautifulSoup(response.text, "html.parser")
            reviews = soup.find_all('div', class_='usercom_inner _c')
            for i in reviews[:5]:
                res.append(re.sub(r'[\s]{2,}|[\n\r\t]+', '', i.find_all('span')[-1].text))
            return res if res else '該電影目前無評論'

        movie_info = {}
        if movie_link:
            response = requests.get(movie_link)
            soup = BeautifulSoup(response.text, "html.parser")
            movie = soup.find('div', class_='movie_intro_info_r')
            review = soup.find('div', class_='btn_plus_more usercom_more gabtn')
            review_link = review.a['href'] if review else None
            starscore = soup.find('div', class_='score_num count')
            starbox = soup.find('div', class_='starbox2')

            chname = re.sub(r'[\s]{2,}|[\n\r\t]+', '', movie.h1.text)
            engname = re.sub(r'[\s]{2,}|[\n\r\t]+', '', movie.h3.text) if movie.h3.text else None

            if starscore and starbox:
                starscore = f'{starscore.text} / 5 {starbox.text.strip()}'
            else:
                starscore = None

            reviews = review_latest(review_link)

            release_date, IMDB = None, None
            for i in movie.find_all('span'):
                text = i.text.strip()
                if '上映日期' in text:
                    release_date = text.split('：')[1]
                elif 'IMDb分數' in text:
                    IMDB = f"{text.split('：')[1]} / 10"
            movie_info['中文名稱'] = chname
            movie_info['英文名稱'] = engname
            movie_info['上映日期'] = release_date
            movie_info['滿意度'] = starscore
            movie_info['IMDb'] = IMDB
            movie_info['Yahoo最新5則評論'] = reviews

            return movie_info

    def search_and_get_movie_info(self):
        query = self.user_input()
        movie_name, movie_link = self.search_movie(query)
        movie_info = self.specific_movie_info(movie_link)
        return movie_name, movie_info

class IMDbMovies:
    def __init__(self, name):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        self.yahoosearchname = name

    def user_input(self):
        while True:
            try:
                movie_name = input("請輸入欲搜尋的電影名稱：")
                if movie_name.strip():
                    return movie_name
            except:
                print('\nTry Again')
                return None

    def specific_movie_reviews(self, movie_link):
        response = requests.get(movie_link, headers=self.headers)
        soup = BeautifulSoup(response.text, "html.parser")
        reviews = soup.find_all('div', class_='review-container')

        movie_reviews = []
        for review in reviews[:5]:
            review_title = review.find('a', class_='title').text.strip()
            movie_reviews.append(review_title)
        return movie_reviews if movie_reviews else '該電影目前無評論'

    def search_movie(self, query):
        # search
        encoded_query = requests.utils.quote(query)
        url = f"https://www.imdb.com/find/?q={encoded_query}&ref_=nv_sr_sm"
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.text, "html.parser")
        search_results = soup.select('#__next > main > div.ipc-page-content-container.ipc-page-content-container--full.sc-5352556-0.cAzlUg > div.ipc-page-content-container.ipc-page-content-container--center > section > div > div.ipc-page-grid__item.ipc-page-grid__item--span-2 > section:nth-child(3) > div.sc-17bafbdb-2.ffAEHI > ul > li')

        # check search list
        if search_results:
            search_count = len(search_results)
            movie = search_results[0]
            k = movie.select_one('div > div > a')
            movie_number = k.get('href').split('/')[2]
            review_link = f'https://www.imdb.com/title/{movie_number}/reviews?ref_=tt_urv'
            if query == k.text:
                return self.specific_movie_reviews(review_link)

            if search_count > 1:
                for m in search_results:
                    k = m.select_one('div > div > a')
                    movie_number = k.get('href').split('/')[2]
                    review_link = f'https://www.imdb.com/title/{movie_number}/reviews?ref_=tt_urv'
                    ans = None
                    while ans != 'Y' and ans != 'N':
                        ans = input(f'找到多部電影，返回第一個搜尋{k.text} (Y)或往下搜索(N):')
                        if ans == 'Y':
                            return self.specific_movie_reviews(review_link)
                        elif ans == 'N':
                            continue
                # 該次搜尋都沒找到，重新搜尋
                self.search_movie(self.user_input())
                sleep(1 + random())
            else:
                return self.specific_movie_reviews(review_link)
        else:
            return f'沒有找到{query}'

In [3]:
def main():
    try:
        YahooMovies_info = YahooMovies()
        movie_name, movie_info = YahooMovies_info.search_and_get_movie_info()

        IMDbMovies_info = IMDbMovies(movie_name)
        reviews = IMDbMovies_info.search_movie(IMDbMovies_info.yahoosearchname)
        movie_info['IMDb最新5則評論'] = reviews
        return movie_info
    except TypeError:
        return '找不到就是找不到'

main()

{'中文名稱': '蜘蛛人：穿越新宇宙',
 '英文名稱': 'Spider-Man: Across the Spider-Verse',
 '上映日期': '2023-06-21',
 '滿意度': '4.1 / 5 (共67人投票)',
 'IMDb': '8.9 / 10',
 'Yahoo最新5則評論': ['明明就超好看，個人算是很愛看電影且對劇情的要求也是嚴格的人，緊湊的節奏不會讓人喘不過氣，感動的細節又扣人心弦，目前今年最棒的電影，我推的就是穩，大家手刀快去看。',
  '神作阿! 近年最佳動畫電影了~~~',
  '根據我的專業影評，我認為沒有黑美人魚好看，滿分5分，所以我給2.133分，',
  '很意外有4.2個星，先不說劇情，整體感覺很想再看卡通而不是電影',
  '大推!畫面雖然繽紛到會有點眼花，但真的非常絢麗細膩，且用各種不同畫風和呈現方式，展現不同的場域跟人物，延續第一集的優點，不愧是奧斯卡最佳動畫片。這集大概有1/2在鋪陳主角跟家人的互動，或許會有人覺得稍微枯燥，但卻把主角心境交代得很清楚，可惜故事太龐大必須分拆兩集，下一集還要等到明年，好想趕快看到。'],
 'IMDb最新5則評論': ['One Step Above Masterpiece!',
  'Masterful IMAX Spider-Man',
  'One of the best sequels to anything ever made',
  'A new Empire Strikes Back',
  'Okay This Is Peak']}