In [128]:
import requests
from bs4 import BeautifulSoup
import json

In [129]:
# Enter IMDb URL
URL = input("Enter IMDb movie URL: ").strip()
if "/title/tt" not in URL:
    raise ValueError("Please enter a valid IMDb title URL, e.g. https://www.imdb.com/title/tt1375666/")

Enter IMDb movie URL:  https://www.imdb.com/title/tt0337578/?ref_=nv_sr_srsg_0_tt_7_nm_1_in_0_q_baghba


In [139]:
# Download page and parse with Beautiful Soup
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(URL, headers = headers, timeout = 15)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

In [131]:
# Extract the JSON
script_tag = soup.find("script", type = "application/ld+json")
metadata = json.loads(script_tag.string) if script_tag and script_tag.string else {}
print(metadata)

{'@context': 'https://schema.org', '@type': 'Movie', 'url': 'https://www.imdb.com/title/tt0337578/', 'name': 'Baghban', 'image': 'https://m.media-amazon.com/images/M/MV5BYzIyYTIzZWYtYzhhMi00ZDA1LWE5NjYtZjhlM2VkYjMxNTMxXkEyXkFqcGc@._V1_.jpg', 'description': 'An elderly couple wish their children to care for them in their old age. But their children see and treat them as a burden, and they must struggle to regain their worth and dignity to themselves and others.', 'review': {'@type': 'Review', 'itemReviewed': {'@type': 'Movie', 'url': 'https://www.imdb.com/title/tt0337578/'}, 'author': {'@type': 'Person', 'name': 'MuviFan73'}, 'dateCreated': '2024-12-29', 'inLanguage': 'English', 'name': 'You will like it if you as human are a right entity.', 'reviewBody': 'I watched the movie again after about two decades. Some things slipped from mind. The highlight was however was cast of Alok. I somehow felt he is first introduced while the scene involving test drive.\n\nIf we see the script, that in

In [132]:
# Download and save the poster
poster_url = metadata.get("image")
print(poster_url)
if poster_url:
    img_bytes = requests.get(poster_url, headers = headers, timeout = 15).content
    with open("poster.jpg", "wb") as fp:
        fp.write(img_bytes)
    print("\n✔️Poster saved as Poster.jpg")
else:
    print("\n❌Poster not found")

https://m.media-amazon.com/images/M/MV5BYzIyYTIzZWYtYzhhMi00ZDA1LWE5NjYtZjhlM2VkYjMxNTMxXkEyXkFqcGc@._V1_.jpg

✔️Poster saved as Poster.jpg


In [133]:
# Find the trailer link
trailer_tag = soup.find("a", href = lambda h:h and "/video/" in h)
if trailer_tag:
    trailer_link = "http://www.imdb.com" + trailer_tag["href"]
    print("🎬Trailer link: ", trailer_link)
else:
    print("🎬Trailer link: not found.")

🎬Trailer link: not found.


In [134]:
# Display the Storyline
storyline = metadata.get("description")
if storyline:
    print("\n📖Storyline:\n", storyline)
else:
    print("\n📖No Storyline")


📖Storyline:
 An elderly couple wish their children to care for them in their old age. But their children see and treat them as a burden, and they must struggle to regain their worth and dignity to themselves and others.


In [135]:
# Display Genre
genre = metadata.get("genre")
print(genre)
if isinstance(genre, list):
    genre = ", ".join(genre)
if genre:
    print("\n🎭Genre:", genre)
else:
    print("\nNot found")

['Drama', 'Romance']

🎭Genre: Drama, Romance


In [136]:
# Display Directors
directors = metadata.get("director")
print(directors)
if isinstance(directors, list):
    director_names = ", ".join(d.get("name") for d in directors if d.get("name"))
elif isinstance(directors, dict):
    director_names = directors.get("name")
else:
    director_names = None
print("\n🎬Directors: ", director_names or "Not found.")

[{'@type': 'Person', 'url': 'https://www.imdb.com/name/nm0159162/', 'name': 'Ravi Chopra'}]

🎬Directors:  Ravi Chopra


In [137]:
# Display Cast
cast_list = metadata.get("actor") or metadata.get("actors")
print(cast_list)
if isinstance(cast_list, list):
    cast_names = ", ".join(a.get("name") for a in cast_list[:10] if a.get("name"))
else:
    cast_names = None
print("\n👥Cast:", cast_names or "Not found.")

[{'@type': 'Person', 'url': 'https://www.imdb.com/name/nm0000821/', 'name': 'Amitabh Bachchan'}, {'@type': 'Person', 'url': 'https://www.imdb.com/name/nm0004564/', 'name': 'Hema Malini'}, {'@type': 'Person', 'url': 'https://www.imdb.com/name/nm0006795/', 'name': 'Salman Khan'}]

👥Cast: Amitabh Bachchan, Hema Malini, Salman Khan


In [138]:
# Display IMDb Rating
agg = metadata.get("aggregateRating")
print(agg)
rating = agg.get("ratingValue") if isinstance(agg, dict) else None
print("\n⭐IMDb Rating:", rating or "Not found.")

{'@type': 'AggregateRating', 'ratingCount': 18285, 'bestRating': 10, 'worstRating': 1, 'ratingValue': 7.4}

⭐IMDb Rating: 7.4
