**Web Scraping Project**



**Importing Important Libraries**

In [153]:
pip install requests



In [154]:
import requests as r

In [155]:
import pandas as pd

In [156]:
pip install beautifulsoup4



In [157]:
from bs4 import BeautifulSoup

**CSV File Format**

```
Movie Name,Link,Genre,OverView,MPA,Released Date,User Score,Duration,Budget,Revenue
```


In [158]:
main_url = 'https://www.themoviedb.org/movie'

In [159]:
def fetching_cards(main_url):
  response = r.get(main_url)
  response.status_code
  content = response.text
  soup = BeautifulSoup(content,'html.parser')
  cards = soup.find_all('div',class_='card style_1')
  return cards

In [160]:
def extracting_title_link(cards):
  movies_main_data = []
  for card in cards:
    title = card.find('h2').find('a')
    text = title.text.strip()
    link = title['href']
    full_link = f'https://www.themoviedb.org{link}'
    movies_main_data.append({
        'Movie Name':text,
        'Link':full_link
    })
  df = pd.DataFrame(movies_main_data)
  return df

In [161]:
def extracting_data(df):
  all_data = []
  for movie in df['Link']:
    try:
      mpa = 'Not Rated'
      score = 'N/A'
      overview_text = 'N/A'
      budget = 'N/A'
      revenue = 'N/A'
      date = 'N/A'
      time = 'N/A'
      final_genres = 'N/A'


      # Parsing Content
      response = r.get(movie)
      content = response.text
      soup = BeautifulSoup(content,'html.parser')

      # Fetching MPA
      certifications = soup.find('div',class_='facts')
      MPA = certifications.find('span',class_='certification')
      if MPA:
          mpa = MPA.text.strip()

      # Fetching User Score
      user_score = soup.find('div',class_ = 'user_score_chart')
      score = user_score['data-percent']

      # Fetching Oerview
      overview = soup.find('div',class_ = 'overview')
      find_text = overview.find('p')
      if find_text:
        overview_text = find_text.text.strip()

      # Fetching Budget
      left_column = soup.find('section',class_ = 'facts left_column')
      para = left_column.find_all('p')
      for p in para:
        if 'Budget' in p.text:
          budget = p.text.replace('Budget','').strip()
          break

      # Fetching Revenue
      para = left_column.find_all('p')
      for p in para:
        if 'Revenue' in p.text:
          revenue = p.text.replace('Revenue','').strip()
          break


      # Fetching Released Date
      release = soup.find('span',class_='release')
      date = release.text.strip()


      # Fetching Genres
      genres = soup.find('span',class_ = 'genres')
      a_ref = genres.find_all('a')
      genre_lists = []
      for text in a_ref:
        genre = text.text.strip()
        genre_lists.append(genre)
      final_genres = ",".join(genre_lists)


      # Fetching Duration
      run_time = soup.find('span',class_ = 'runtime')
      time = run_time.text.strip()

      all_data.append({
            'MPA':mpa,
            'User Score':score,
            'Overview': overview_text,
            'Genre': final_genres,
            'Duration':time,
            'Released Date':date,
            'Budget':budget,
            'Revenue':revenue

        })



    except Exception as e:
      print(f"Error scraping {movie}: {e}")
      continue
  df_result = pd.DataFrame(all_data)
  return df_result

In [162]:
def merging_dataframes(df,df_result):
  df = pd.concat([df,df_result],axis=1)
  return df

In [163]:
def main():
  cards = fetching_cards(main_url)
  df = extracting_title_link(cards)
  df_result = extracting_data(df)
  df = merging_dataframes(df=df,df_result=df_result)
  df.head()
  return df

In [164]:
if __name__ == '__main__':
  main()

In [165]:
df = main()

In [166]:
df.head(10)

Unnamed: 0,Movie Name,Link,MPA,User Score,Overview,Genre,Duration,Released Date,Budget,Revenue
0,The Wrecking Crew,https://www.themoviedb.org/movie/1168190-the-w...,R,65,Estranged half-brothers Jonny and James reunit...,"Action,Comedy,Crime,Mystery",2h 2m,01/28/2026 (US),-,-
1,Greenland 2: Migration,https://www.themoviedb.org/movie/840464-greenl...,PG-13,65,Having found the safety of the Greenland bunke...,"Adventure,Thriller,Science Fiction",1h 38m,01/09/2026 (US),"$90,000,000.00","$11,416,907.00"
2,Zootopia 2,https://www.themoviedb.org/movie/1084242-zooto...,PG,76,After cracking the biggest case in Zootopia's ...,"Animation,Comedy,Adventure,Family,Mystery",1h 48m,11/26/2025 (US),"$150,000,000.00","$1,744,338,246.00"
3,The Shadow's Edge,https://www.themoviedb.org/movie/1419406,NR,72,Macau Police brings the tracking expert police...,"Action,Crime,Drama,Thriller",2h 22m,08/22/2025 (US),-,"$174,400,000.00"
4,Anaconda,https://www.themoviedb.org/movie/1234731-anaconda,PG-13,60,A group of friends facing mid-life crises head...,"Adventure,Comedy,Horror",1h 40m,12/25/2025 (US),"$45,000,000.00","$129,019,155.00"
5,96 Minutes,https://www.themoviedb.org/movie/1271895-96,12,69,"Former bomb disposal expert, Song Kang-Ren, an...","Action,Crime,Romance",1h 57m,09/05/2025 (TW),-,-
6,The Internship,https://www.themoviedb.org/movie/1584215-the-i...,Not Rated,61,A CIA-trained assassin recruits other graduate...,Action,1h 31m,01/13/2026 (US),-,-
7,The Rip,https://www.themoviedb.org/movie/1306368-the-rip,R,70,Trust frays when a team of Miami cops discover...,"Action,Thriller,Crime",1h 53m,01/13/2026 (US),"$100,000,000.00",-
8,Avatar: Fire and Ash,https://www.themoviedb.org/movie/83533-avatar-...,PG-13,73,In the wake of the devastating war against the...,"Science Fiction,Adventure,Fantasy",3h 18m,12/19/2025 (US),"$350,000,000.00","$1,378,692,505.00"
9,Predator: Badlands,https://www.themoviedb.org/movie/1242898-preda...,PG-13,78,"Cast out from his clan, a young Predator finds...","Action,Science Fiction,Adventure",1h 47m,11/07/2025 (US),"$105,000,000.00","$184,561,056.00"


In [169]:
df.to_csv('Movies_Scrapped_Data.csv')

In [None]:
from google.colab import drive
drive.mount('/content/drive')