In [None]:
import requests
import sys
import shutil
import csv
import os
import json
from bs4 import BeautifulSoup


class IMDbScraper:
    def __init__(self, url) -> None:
        self.url = url.strip()
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4473.124 Safari/537.36"
        }

#user reviews are fetched using the function
    def get_user_reviews(self):
        reviews_url = self.url + 'reviews?sort=helpfulness'
        try:
            response = requests.get(reviews_url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            review_containers = soup.find_all('div', class_='ipc-html-content-inner-div')
            reviews = [review.get_text(strip=True) for review in review_containers[:5]]
            return reviews if reviews else ['No reviews available']
        except Exception:
            return ['No reviews available']

#cast story is fetched in this function

    def get_cast(self, soup):
        try:
            cast = [c.get_text(strip=True) for c in soup.find_all('div', class_='ipc-metadata-list-item__content-container')][:5]
            return cast if cast else ['Cast not available']
        except Exception:
            return ['Cast not available']

#video trailer and its related data are fetched using this function

    def get_video_trailer(self, soup, url):
        trailer_url = ''
        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            script_tag = soup.find('script', type='application/ld+json')
            if script_tag:
                movie_data = json.loads(script_tag.string)
                trailer_data = movie_data.get('trailer', {})
                if 'embedUrl' in trailer_data:
                    trailer_url = trailer_data['embedUrl']
                elif 'url' in trailer_data:
                    trailer_url = trailer_data['url']
            return trailer_url
        except Exception:
            return 'Trailer not available'

#Movie reviews and trailer data is fetched using the libraries

    def scrape(self):
        try:
            response = requests.get(self.url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')

            trailer = self.get_video_trailer(soup, self.url)

            reviews = self.get_user_reviews()

            cast_list = self.get_cast(soup)

            script_tag = soup.find('script', type='application/ld+json')

            if script_tag:
                movie_data = json.loads(script_tag.string)
                name = movie_data.get('name', 'Name not available')
                poster = movie_data.get('image', 'Poster not available')
                storyline = movie_data.get('description', 'Storyline not available')
                genres = ', '.join(movie_data.get('genre', [])) if movie_data.get('genre', []) else 'Genre not available'
                director = ', '.join([director['name'] for director in movie_data.get('director', [])]) if movie_data.get('director', []) else 'Director not available'
                rating = movie_data.get('aggregateRating', {}).get('ratingValue', 'Rating not available')
            else:
                raise RuntimeError("Invalid IMDb URL!")

            print("Fetched movie details successfully.")

            details = {
                'Name': name,
                'Poster': poster,
                'Video Trailer': trailer,
                'Storyline': storyline,
                'User Reviews': reviews,
                'Genre': genres,
                'Director': director,
                'IMDb Rating': rating,
                'Cast': cast_list
            }

            return details

        except requests.exceptions.RequestException as e:
            return {'Error': f'Failed to fetch data: {str(e)}'}

    def save_details(self, movie_details):
        os.makedirs(movie_details['Name'], exist_ok=True)
        details_file_path = os.path.join(movie_details['Name'], "details.csv")
        with open(details_file_path, 'w') as details_file:
            writer = csv.writer(details_file)
            for key, value in movie_details.items():
                writer.writerow([key, value])

# Fetching the link of movie trailer url and download the poster image

        print("Attempting to download poster...")
        if movie_details['Poster'] and movie_details['Poster'] != 'Poster not available':
            with requests.get(movie_details['Poster'], stream=True) as req:
                with open(os.path.join(movie_details['Name'], "poster.png"), 'wb') as poster_file:
                    shutil.copyfileobj(req.raw, poster_file)
            print("Poster downloaded successfully.")

#downloading data of given movie trailer in the link

        print("Attempting to download trailer...")
        try:
            if movie_details['Video Trailer'] and movie_details['Video Trailer'] != 'Trailer not available':
                trailer_video_url = ''
                response = requests.get(movie_details['Video Trailer'], headers=self.headers)
                response.raise_for_status()
                soup = BeautifulSoup(response.content, 'html.parser')
                script_tag = soup.find('script', type='application/json')
                if script_tag:
                    trailer_data = json.loads(script_tag.string)
                    trailer_video_url = trailer_data['props']['pageProps']['videoPlaybackData']['video']['playbackURLs'][1]['url']

                with requests.get(trailer_video_url, stream=True) as req:
                    with open(os.path.join(movie_details['Name'], "trailer.mp4"), 'wb') as trailer_file:
                        shutil.copyfileobj(req.raw, trailer_file)

                print("Trailer downloaded successfully.")
        except Exception:
            pass
#fetching the user input of any particular movie trailer
imdb_url = input("Enter any required IMDb movie trailer URL: ")
scraper = IMDbScraper(imdb_url)
movie_details = scraper.scrape()
scraper.save_details(movie_details)
print(f"Scraped movie details saved to ./{movie_details['Name']}")


Enter any required IMDb movie trailer URL: https://www.imdb.com/title/tt0111161/
Fetched movie details successfully.
Attempting to download poster...
Poster downloaded successfully.
Attempting to download trailer...
Trailer downloaded successfully.
Scraped movie details saved to ./The Shawshank Redemption
