#### Imports

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import pandas as pd
import time
from bs4 import BeautifulSoup
import requests

#### Part 1-Browser Automation With Selenium

In [2]:
website = "https://imdb.com"
driver = webdriver.Chrome()
driver.get(website)

#maximize window
#driver.maximize_window()

#dropdown
dropdown = driver.find_element("id","iconContext-arrow-drop-down")
dropdown.click()
time.sleep(1)

#click on Advanced Search from dropdown menu
element = driver.find_element("link text","Advanced Search")
element.click()

#click on advanced title search
adv_title = driver.find_element("link text","Advanced Title Search")
adv_title.click()

#select feature film
feature_film = driver.find_element("id","title_type-1")
feature_film.click()

#select tv moive
tv_moive = driver.find_element("id","title_type-2")
tv_moive.click()

#min date
min_date = driver.find_element("name","release_date-min")
min_date.click()
min_date.send_keys("1990")

#max date
max_date = driver.find_element("name","release_date-max")
max_date.click()
max_date.send_keys("2022")

#min rating
min_rating = Select(driver.find_element("name","user_rating-min"))
min_rating.select_by_visible_text("1.0")

#max rating
max_rating = Select(driver.find_element("name","user_rating-max"))
max_rating.select_by_visible_text("10")

#Select Oscar-Nominated
oscar_nominated = driver.find_element("id","groups-7")
oscar_nominated.click()

#Select color
color = driver.find_element("id","colors-1")
color.click()

#Select English Language
language = Select(driver.find_element("name","languages"))
language.select_by_visible_text("English")

#Select display option
display_count = Select(driver.find_element("id","search-count"))
display_count.select_by_visible_text("250 per page")

#click on search button
search = driver.find_element("xpath",'(//button[@type="submit"])[2]')
search.click()

#save current url
current_url = driver.current_url

In [3]:
current_url

'https://www.imdb.com/search/title/?title_type=feature,tv_movie&release_date=1990-01-01,2022-12-31&user_rating=1.0,10.0&groups=oscar_nominee&colors=color&languages=en&count=250'

#### Part 2-Data Extraction With Beautiful Soup

In [4]:
#get request
response = requests.get(current_url)

In [5]:
#check the status code
response.status_code

200

In [6]:
soup = BeautifulSoup(response.content, "html.parser")

In [7]:
#list of items
results = soup.find_all("div",class_="lister-item mode-advanced")

In [8]:
#check length of results
len(results)

250

#### Data We Need To Extract

- Movie Title
- Year
- Durtion
- Genre
- Rating 
- Gross

In [9]:
#movie title
results[0].find("h3",class_="lister-item-header").find("a").get_text()

'Everything Everywhere All at Once'

In [10]:
#year
results[0].find("h3",class_="lister-item-header").find_all("span")[1].get_text().replace("(","").replace(")","")

'2022'

In [11]:
#duration
results[0].find("span",class_="runtime").get_text()

'139 min'

In [12]:
#genre
results[0].find("span",class_="genre").get_text().strip()

'Action, Adventure, Comedy'

In [13]:
#rating
results[0].find("div",class_="inline-block").find("strong").get_text()

'7.9'

In [14]:
#gross
results[0].find("p",class_="sort-num_votes-visible").find_all("span")[-1].get_text()

'$72.86M'

In [15]:
#list comperhension
moive_title = [result.find("h3",class_="lister-item-header").find("a").get_text() for result in results]
year = [result.find("h3",class_="lister-item-header").find_all("span")[1].get_text().replace("(","").replace(")","") for result in results]
duration = [result.find("span",class_="runtime").get_text() for result in results]
genre = [result.find("span",class_="genre").get_text().strip() for result in results]
rating = [result.find("div",class_="inline-block").find("strong").get_text() for result in results]
gross = [result.find("p",class_="sort-num_votes-visible").find_all("span")[-1].get_text() for result in results]

In [16]:
#create DataFrame
imdb_df = pd.DataFrame({"Moive_Title":moive_title,"Year":year,"Durtion":duration,"Genre":genre,"Rating":rating,"Gross":gross})

In [17]:
imdb_df

Unnamed: 0,Moive_Title,Year,Durtion,Genre,Rating,Gross
0,Everything Everywhere All at Once,2022,139 min,"Action, Adventure, Comedy",7.9,$72.86M
1,The Whale,2022,117 min,Drama,7.8,111780
2,The Banshees of Inisherin,2022,114 min,"Comedy, Drama",7.7,182267
3,Triangle of Sadness,2022,147 min,"Comedy, Drama",7.4,114249
4,Babylon,I 2022,189 min,"Comedy, Drama, History",7.2,99112
...,...,...,...,...,...,...
245,The Addams Family,1991,99 min,"Comedy, Fantasy",6.9,$113.50M
246,Ad Astra,2019,123 min,"Adventure, Drama, Mystery",6.5,$50.19M
247,Tron,2010,125 min,"Action, Adventure, Sci-Fi",6.8,$172.06M
248,Guardians of the Galaxy Vol. 2,2017,136 min,"Action, Adventure, Comedy",7.6,$389.81M


In [18]:
#save outputs into excel file
imdb_df.to_csv("imdb_data.csv",index=False)