### Selenium & Beautiful Soup Combined project : Automation with Selenium & data extraction using BeautifulSoup Library (Basic)

##### pre-requisite
- Basic Html CSS  
- Basic ideas on Inspecting elements from a webpage
- Download automated test software/webdriver of your browser 

##### Supporting Documentations:

- Selenium Documentation : https://selenium-python.readthedocs.io/
- BeautifulSoup documentation :
- https://www.tutorialspoint.com/beautiful_soup/beautiful_soup_tutorial.pdf
- https://tedboy.github.io/bs4_doc/
- https://www.dataquest.io/blog/web-scraping-python-using-beautiful-soup/

- HTTP response status codes :  https://developer.mozilla.org/en-US/docs/Web/HTTP/Status

- Xpath Expresssion : 
- https://www.lambdatest.com/blog/complete-guide-for-using-xpath-in-selenium-with-examples/
- https://www.ibm.com/docs/en/app-connect/11.0.0?topic=xpath-creating-expressions
    

#### Import required libraries

In [1]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import requests

In [22]:
################# Browser Automation with Selenium #############################
#locating elements
#navigating


## for opening a specific website by automated test software, chromedriver

driver = webdriver.Chrome(executable_path= r'C:\Users\sunan\Downloads\chromedriver.exe')
driver.get('https://www.imdb.com/')   #To access any website


# maximize window
driver.maximize_window()

# dropdown
dropdown = driver.find_element_by_class_name('ipc-icon--arrow-drop-down')
dropdown.click()  ##automatically opens a dropdown menu
time.sleep(1) #after navigating a dropdown bar will pause for 1second

# accesses the linked page of advanced search from dropdown menu of IMDB website
element = driver.find_element_by_link_text('Advanced Search')
element.click()


# click on avanced title search
adv_title = driver.find_element_by_link_text('Advanced Title Search')
adv_title.click() 


# selecting feature film
feature_film = driver.find_element_by_id('title_type-1')
feature_film.click() 

# select tv movie
tv_movie = driver.find_element_by_id('title_type-12')
tv_movie.click()


# min date
min_date = driver.find_element_by_name('release_date-min')
min_date.click()
min_date.send_keys('1990') ## starting time range 

# max date
max_date = driver.find_element_by_name('release_date-max')
max_date.click()
max_date.send_keys('2020') ## last time range


# rating min
rating_min = driver.find_element_by_name('user_rating-min')
rating_min.click()
dropdown_2 = Select(rating_min)
dropdown_2.select_by_visible_text('1.0') 
##select_by_visible_text has been used for selecting exactly what is shown in dropdown menu

# rating max
rating_max = driver.find_element_by_name('user_rating-max')
rating_max.click()
dropdown_3 = Select(rating_max)
dropdown_3.select_by_visible_text('10')

# oscar Winning
oscar_nominated = driver.find_element_by_id('groups-4')
oscar_nominated.click()

# color info
color = driver.find_element_by_id('colors-3')
color.click()

# language
language = driver.find_element_by_name('languages')
dropdown_4 = Select(language)
dropdown_4.select_by_visible_text('English') 

# 250 results
results_count = driver.find_element_by_id('search-count')
dropdown_5 = Select(results_count)
dropdown_5.select_by_index(2)


# submit
submit = driver.find_element_by_xpath('(//button[@type="submit"])[2]')  ##Xpath Expresssion for button tag
submit.click()

# current
current_url = driver.current_url




################# Data Extraction with Beautiful Soup #############################

# get request
response = requests.get(current_url)

# soup object
soup = BeautifulSoup(response.content, 'html.parser')

# result items (starting point)
list_items = soup.find_all('div', {'class':'lister-item'})


# list comprehension
movie_title = [result.find('h3').find('a').get_text() for result in list_items]
year = [result.find('h3').find('span', {'class':'lister-item-year'}).get_text().replace('(', '').replace(')', '') for result in list_items]
duration = [result.find('span', {'class':'runtime'}).get_text() for result in list_items]
genre = [result.find('span', {'class':'genre'}).get_text().strip() for result in list_items]
rating = [result.find('div', {'class':'ratings-imdb-rating'}).get_text().strip() for result in list_items]

# create dataframe
imdb_df = pd.DataFrame({'Movie Title': movie_title, 'Year': year, 'Duration':duration,
                       'Genre': genre, 'Rating':rating})

imdb_df

  driver = webdriver.Chrome(executable_path= r'C:\Users\sunan\Downloads\chromedriver.exe')
  dropdown = driver.find_element_by_class_name('ipc-icon--arrow-drop-down')
  element = driver.find_element_by_link_text('Advanced Search')
  adv_title = driver.find_element_by_link_text('Advanced Title Search')
  feature_film = driver.find_element_by_id('title_type-1')
  tv_movie = driver.find_element_by_id('title_type-12')
  min_date = driver.find_element_by_name('release_date-min')
  max_date = driver.find_element_by_name('release_date-max')
  rating_min = driver.find_element_by_name('user_rating-min')
  rating_max = driver.find_element_by_name('user_rating-max')
  oscar_nominated = driver.find_element_by_id('groups-4')
  color = driver.find_element_by_id('colors-3')
  language = driver.find_element_by_name('languages')
  results_count = driver.find_element_by_id('search-count')
  submit = driver.find_element_by_xpath('(//button[@type="submit"])[2]')  ##Xpath Expresssion for button tag


Unnamed: 0,Movie Title,Year,Duration,Genre,Rating
0,Blade Runner 2049,2017,164 min,"Action, Drama, Mystery",8.0
1,Interstellar,2014,169 min,"Adventure, Drama, Sci-Fi",8.6
2,Joker,2019,122 min,"Crime, Drama, Thriller",8.4
3,Once Upon a Time... in Hollywood,2019,161 min,"Comedy, Drama",7.6
4,Promising Young Woman,2020,113 min,"Crime, Drama, Mystery",7.5
...,...,...,...,...,...
245,If Beale Street Could Talk,2018,119 min,"Drama, Romance",7.1
246,Finding Neverland,2004,106 min,"Biography, Drama, Family",7.7
247,The Artist,I 2011,100 min,"Comedy, Drama, Romance",7.9
248,The Last King of Scotland,2006,123 min,"Biography, Drama, History",7.7


In [23]:
imdb_df.sort_values(by=['Year'])

Unnamed: 0,Movie Title,Year,Duration,Genre,Rating
147,The Hunt for Red October,1990,135 min,"Action, Adventure, Thriller",7.6
116,Misery,1990,107 min,"Drama, Thriller",7.8
72,Ghost,1990,127 min,"Drama, Fantasy, Romance",7.1
23,Goodfellas,1990,146 min,"Biography, Crime, Drama",8.7
148,Dances with Wolves,1990,181 min,"Adventure, Drama, Western",8.0
...,...,...,...,...,...
86,Rocketman,I 2019,121 min,"Biography, Drama, Musical",7.3
43,Bombshell,I 2019,109 min,"Biography, Drama",6.8
32,The Father,I 2020,97 min,Drama,8.3
17,Arrival,II 2016,116 min,"Drama, Sci-Fi",7.9


In [24]:
# output as csv file
imdb_df.to_csv('imdb_data.csv', index=False)

##### Load the CSV file

In [25]:
df = pd.read_csv(r'C:\Users\sunan\Desktop\scraping with beautiful soup and selenium\imdb_data.csv')
df

Unnamed: 0,Movie Title,Year,Duration,Genre,Rating
0,Blade Runner 2049,2017,164 min,"Action, Drama, Mystery",8.0
1,Interstellar,2014,169 min,"Adventure, Drama, Sci-Fi",8.6
2,Joker,2019,122 min,"Crime, Drama, Thriller",8.4
3,Once Upon a Time... in Hollywood,2019,161 min,"Comedy, Drama",7.6
4,Promising Young Woman,2020,113 min,"Crime, Drama, Mystery",7.5
...,...,...,...,...,...
245,If Beale Street Could Talk,2018,119 min,"Drama, Romance",7.1
246,Finding Neverland,2004,106 min,"Biography, Drama, Family",7.7
247,The Artist,I 2011,100 min,"Comedy, Drama, Romance",7.9
248,The Last King of Scotland,2006,123 min,"Biography, Drama, History",7.7


# Breaking down the steps..................

##### Part 1 - Browser Automation with Selenium

In [3]:
## for opening a specific website by automated test software, chromedriver

driver = webdriver.Chrome(executable_path= r'C:\Users\sunan\Downloads\chromedriver.exe')
driver.get('https://www.imdb.com/')   #To access any website


# maximize window
driver.maximize_window()

# dropdown
dropdown = driver.find_element_by_class_name('ipc-icon--arrow-drop-down')
dropdown.click()  ##automatically opens a dropdown menu
time.sleep(1) #after navigating a dropdown bar will pause for 1second

# accesses the linked page of advanced search from dropdown menu of IMDB website
element = driver.find_element_by_link_text('Advanced Search')
element.click()


# click on avanced title search
adv_title = driver.find_element_by_link_text('Advanced Title Search')
adv_title.click() 


# selecting feature film
feature_film = driver.find_element_by_id('title_type-1')
feature_film.click() 

# select tv movie
tv_movie = driver.find_element_by_id('title_type-12')
tv_movie.click()


# min date
min_date = driver.find_element_by_name('release_date-min')
min_date.click()
min_date.send_keys('1990') ## starting time range 

# max date
max_date = driver.find_element_by_name('release_date-max')
max_date.click()
max_date.send_keys('2020') ## last time range


# rating min
rating_min = driver.find_element_by_name('user_rating-min')
rating_min.click()
dropdown_2 = Select(rating_min)
dropdown_2.select_by_visible_text('1.0') 
##select_by_visible_text has been used for selecting exactly what is shown in dropdown menu

# rating max
rating_max = driver.find_element_by_name('user_rating-max')
rating_max.click()
dropdown_3 = Select(rating_max)
dropdown_3.select_by_visible_text('10')

# oscar Winning
oscar_nominated = driver.find_element_by_id('groups-4')
oscar_nominated.click()

# color info
color = driver.find_element_by_id('colors-3')
color.click()

# language
language = driver.find_element_by_name('languages')
dropdown_4 = Select(language)
dropdown_4.select_by_visible_text('English') 

# 250 results
results_count = driver.find_element_by_id('search-count')
dropdown_5 = Select(results_count)
dropdown_5.select_by_index(2)


# submit
submit = driver.find_element_by_xpath('(//button[@type="submit"])[2]')  ##Xpath Expresssion for button tag
submit.click()

# current
current_url = driver.current_url


  driver = webdriver.Chrome(executable_path= r'C:\Users\sunan\Downloads\chromedriver.exe')
  dropdown = driver.find_element_by_class_name('ipc-icon--arrow-drop-down')
  element = driver.find_element_by_link_text('Advanced Search')
  adv_title = driver.find_element_by_link_text('Advanced Title Search')
  feature_film = driver.find_element_by_id('title_type-1')
  tv_movie = driver.find_element_by_id('title_type-12')
  min_date = driver.find_element_by_name('release_date-min')
  max_date = driver.find_element_by_name('release_date-max')
  rating_min = driver.find_element_by_name('user_rating-min')
  rating_max = driver.find_element_by_name('user_rating-max')
  oscar_nominated = driver.find_element_by_id('groups-4')
  color = driver.find_element_by_id('colors-3')
  language = driver.find_element_by_name('languages')
  results_count = driver.find_element_by_id('search-count')
  submit = driver.find_element_by_xpath('(//button[@type="submit"])[2]')  ##Xpath Expresssion for button tag


In [4]:
current_url

'https://www.imdb.com/search/title/?title_type=feature,podcast_series&release_date=1990-01-01,2020-12-31&user_rating=1.0,10.0&groups=oscar_winner&colors=colorized&languages=en&count=250'

##### Part 2 - Data Extraction with Beautiful Soup

In [5]:
# get request
response = requests.get(current_url)

In [6]:
# status code
response.status_code

200

In [8]:
# soup object
soup = BeautifulSoup(response.content, 'html.parser')

In [9]:
soup


<!DOCTYPE html>

<html xmlns:fb="http://www.facebook.com/2008/fbml" xmlns:og="http://ogp.me/ns#">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<script type="text/javascript">var IMDbTimer={starttime: new Date().getTime(),pt:'java'};</script>
<script>
    if (typeof uet == 'function') {
      uet("bb", "LoadTitle", {wb: 1});
    }
</script>
<script>(function(t){ (t.events = t.events || {})["csm_head_pre_title"] = new Date().getTime(); })(IMDbTimer);</script>
<title>Feature Film/Podcast Series,
Released between 1990-01-01 and 2020-12-31,
User Rating between 1 and 10,
Oscar-Winning,
Colorized,
English
(Sorted by Popularity Ascending) - IMDb</title>
<script>(function(t){ (t.events = t.events || {})["csm_head_post_title"] = new Date().getTime(); })(IMDbTimer);</script>
<script>
    if (typeof uet == 'function') {
      uet("be", "LoadTitle", {wb: 1});
    }
</script>
<script>
    if (typeof uex == 'function') {
      uex("ld", "LoadTitle", {wb: 1});

In [10]:
# result items (starting point)
list_items = soup.find_all('div', {'class':'lister-item'})

In [11]:
len(list_items)

250

#### Extracting data

- movie title
- year
- duration
- genre
- rating

**find the first movie**

In [26]:
#find the first movie

list_items[0].find('h3')

<h3 class="lister-item-header">
<span class="lister-item-index unbold text-primary">1.</span>
<a href="/title/tt1856101/">Blade Runner 2049</a>
<span class="lister-item-year text-muted unbold">(2017)</span>
</h3>

In [27]:
# movie title
list_items[0].find('h3').find('a')

<a href="/title/tt1856101/">Blade Runner 2049</a>

In [29]:
# movie title as text
list_items[0].find('h3').find('a').get_text()

'Blade Runner 2049'

#### released year of the first movie in the list

In [30]:
# year
list_items[0].find('h3')

<h3 class="lister-item-header">
<span class="lister-item-index unbold text-primary">1.</span>
<a href="/title/tt1856101/">Blade Runner 2049</a>
<span class="lister-item-year text-muted unbold">(2017)</span>
</h3>

In [31]:
# year
list_items[0].find('h3').find('span', {'class':'lister-item-year'})

<span class="lister-item-year text-muted unbold">(2017)</span>

In [32]:
# year
list_items[0].find('h3').find('span', {'class':'lister-item-year'}).get_text()

'(2017)'

In [33]:
# year
list_items[0].find('h3').find('span', {'class':'lister-item-year'}).get_text().replace('(', '')

'2017)'

In [34]:
# year
list_items[0].find('h3').find('span', {'class':'lister-item-year'}).get_text().replace('(', '').replace(')', '')

'2017'

#### check duration

In [35]:
# year
list_items[0].find('h3')

<h3 class="lister-item-header">
<span class="lister-item-index unbold text-primary">1.</span>
<a href="/title/tt1856101/">Blade Runner 2049</a>
<span class="lister-item-year text-muted unbold">(2017)</span>
</h3>

**Duration check**

In [37]:
## inspection tool on time, then find the tag where the duration is placed

In [36]:
# duration
list_items[0].find('p')

<p class="text-muted">
<span class="certificate">M18</span>
<span class="ghost">|</span>
<span class="runtime">164 min</span>
<span class="ghost">|</span>
<span class="genre">
Action, Drama, Mystery            </span>
</p>

In [14]:
# duration
list_items[0].find('span', {'class':'runtime'}).get_text()

'164 min'

**Genre Check**

In [38]:
# genre
list_items[0].find('span', {'class':'genre'}).get_text()

'\nAction, Drama, Mystery            '

In [39]:
# genre
list_items[0].find('span', {'class':'genre'}).get_text().strip()

'Action, Drama, Mystery'

In [40]:
# rating
list_items[0].find('div', {'class':'ratings-imdb-rating'}).get_text().strip()

'8.0'

In [41]:
# list comprehension
movie_title = [result.find('h3').find('a').get_text() for result in list_items]
year = [result.find('h3').find('span', {'class':'lister-item-year'}).get_text().replace('(', '').replace(')', '') for result in list_items]
duration = [result.find('span', {'class':'runtime'}).get_text() for result in list_items]
genre = [result.find('span', {'class':'genre'}).get_text().strip() for result in list_items]
rating = [result.find('div', {'class':'ratings-imdb-rating'}).get_text().strip() for result in list_items]


In [42]:
movie_title

['Blade Runner 2049',
 'Interstellar',
 'Joker',
 'Once Upon a Time... in Hollywood',
 'Promising Young Woman',
 'Tenet',
 'Dracula',
 'Coco',
 'Spectre',
 'Call Me by Your Name',
 'Gisaengchung',
 'The Dark Knight',
 'The Matrix',
 'Titanic',
 'Pulp Fiction',
 '1917',
 'The Lord of the Rings: The Fellowship of the Ring',
 'Arrival',
 'Sleepy Hollow',
 'Little Women',
 'Get Out',
 'Skyfall',
 'Inception',
 'Goodfellas',
 'Gladiator',
 'The Silence of the Lambs',
 'Nomadland',
 'The Grand Budapest Hotel',
 'Suicide Squad',
 'Django Unchained',
 'Inglourious Basterds',
 'Jurassic Park',
 'The Father',
 'Forrest Gump',
 'Jojo Rabbit',
 'Avatar',
 'American Beauty',
 'Mad Max: Fury Road',
 'No Country for Old Men',
 'Dunkirk',
 'Saving Private Ryan',
 'The Departed',
 "Schindler's List",
 'Bombshell',
 'Black Swan',
 'The Great Gatsby',
 'Whiplash',
 'Bohemian Rhapsody',
 'Spider-Man: Into the Spider-Verse',
 'Good Will Hunting',
 'Black Panther',
 'The Girl with the Dragon Tattoo',
 'The 

In [43]:
year

['2017',
 '2014',
 '2019',
 '2019',
 '2020',
 '2020',
 '1992',
 'I 2017',
 'I 2015',
 '2017',
 '2019',
 '2008',
 '1999',
 '1997',
 '1994',
 '2019',
 '2001',
 'II 2016',
 '1999',
 '2019',
 'I 2017',
 '2012',
 '2010',
 '1990',
 '2000',
 '1991',
 '2020',
 '2014',
 '2016',
 '2012',
 '2009',
 '1993',
 'I 2020',
 '1994',
 '2019',
 '2009',
 '1999',
 '2015',
 '2007',
 '2017',
 '1998',
 '2006',
 '1993',
 'I 2019',
 '2010',
 '2013',
 '2014',
 '2018',
 '2018',
 '1997',
 '2018',
 '2011',
 '2003',
 '1997',
 '2014',
 '2015',
 '2004',
 '2016',
 '2007',
 '2015',
 '1996',
 '2007',
 '2016',
 '2016',
 '1995',
 '2001',
 '2010',
 '2000',
 '2020',
 '2004',
 '2020',
 '1995',
 '1990',
 '2002',
 '2015',
 '1991',
 '2016',
 '2007',
 '2012',
 '2013',
 '2009',
 '2018',
 '2002',
 '2017',
 '2017',
 '1994',
 'I 2019',
 '1996',
 '2001',
 '2005',
 'I 2013',
 '2019',
 '1992',
 '2001',
 '2019',
 '1997',
 '2004',
 '1997',
 '2014',
 'I 2015',
 '2003',
 '2019',
 '2018',
 '2001',
 'I 2010',
 '2012',
 '2006',
 '2007',
 '2005'

In [44]:
duration

['164 min',
 '169 min',
 '122 min',
 '161 min',
 '113 min',
 '150 min',
 '128 min',
 '105 min',
 '148 min',
 '132 min',
 '132 min',
 '152 min',
 '136 min',
 '194 min',
 '154 min',
 '119 min',
 '178 min',
 '116 min',
 '105 min',
 '135 min',
 '104 min',
 '143 min',
 '148 min',
 '146 min',
 '155 min',
 '118 min',
 '107 min',
 '99 min',
 '123 min',
 '165 min',
 '153 min',
 '127 min',
 '97 min',
 '142 min',
 '108 min',
 '162 min',
 '122 min',
 '120 min',
 '122 min',
 '106 min',
 '169 min',
 '151 min',
 '195 min',
 '109 min',
 '108 min',
 '143 min',
 '106 min',
 '134 min',
 '117 min',
 '126 min',
 '134 min',
 '158 min',
 '201 min',
 '138 min',
 '108 min',
 '130 min',
 '108 min',
 '139 min',
 '123 min',
 '156 min',
 '145 min',
 '158 min',
 '108 min',
 '132 min',
 '178 min',
 '125 min',
 '120 min',
 '122 min',
 '100 min',
 '127 min',
 '115 min',
 '106 min',
 '127 min',
 '179 min',
 '168 min',
 '137 min',
 '128 min',
 '116 min',
 '120 min',
 '126 min',
 '127 min',
 '130 min',
 '150 min',
 '123 

In [45]:
genre

['Action, Drama, Mystery',
 'Adventure, Drama, Sci-Fi',
 'Crime, Drama, Thriller',
 'Comedy, Drama',
 'Crime, Drama, Mystery',
 'Action, Sci-Fi, Thriller',
 'Fantasy, Horror',
 'Animation, Adventure, Comedy',
 'Action, Adventure, Thriller',
 'Drama, Romance',
 'Comedy, Drama, Thriller',
 'Action, Crime, Drama',
 'Action, Sci-Fi',
 'Drama, Romance',
 'Crime, Drama',
 'Action, Drama, War',
 'Action, Adventure, Drama',
 'Drama, Sci-Fi',
 'Fantasy, Horror, Mystery',
 'Drama, Romance',
 'Horror, Mystery, Thriller',
 'Action, Adventure, Thriller',
 'Action, Adventure, Sci-Fi',
 'Biography, Crime, Drama',
 'Action, Adventure, Drama',
 'Crime, Drama, Horror',
 'Drama',
 'Adventure, Comedy, Crime',
 'Action, Adventure, Fantasy',
 'Drama, Western',
 'Adventure, Drama, War',
 'Action, Adventure, Sci-Fi',
 'Drama',
 'Drama, Romance',
 'Comedy, Drama, War',
 'Action, Adventure, Fantasy',
 'Drama',
 'Action, Adventure, Sci-Fi',
 'Crime, Drama, Thriller',
 'Action, Drama, History',
 'Drama, War',
 'C

In [46]:
rating

['8.0',
 '8.6',
 '8.4',
 '7.6',
 '7.5',
 '7.4',
 '7.4',
 '8.4',
 '6.8',
 '7.9',
 '8.6',
 '9.0',
 '8.7',
 '7.8',
 '8.9',
 '8.3',
 '8.8',
 '7.9',
 '7.3',
 '7.8',
 '7.7',
 '7.8',
 '8.8',
 '8.7',
 '8.5',
 '8.6',
 '7.4',
 '8.1',
 '5.9',
 '8.4',
 '8.3',
 '8.1',
 '8.3',
 '8.8',
 '7.9',
 '7.8',
 '8.3',
 '8.1',
 '8.1',
 '7.8',
 '8.6',
 '8.5',
 '8.9',
 '6.8',
 '8.0',
 '7.2',
 '8.5',
 '7.9',
 '8.4',
 '8.3',
 '7.3',
 '7.8',
 '8.9',
 '8.2',
 '7.7',
 '7.8',
 '8.3',
 '8.1',
 '7.8',
 '8.0',
 '7.0',
 '8.2',
 '8.0',
 '7.3',
 '8.3',
 '8.6',
 '7.7',
 '7.9',
 '8.1',
 '7.3',
 '7.5',
 '8.5',
 '7.1',
 '8.7',
 '7.8',
 '8.5',
 '8.0',
 '7.3',
 '7.7',
 '8.0',
 '7.9',
 '8.2',
 '8.5',
 '7.3',
 '8.1',
 '8.5',
 '7.3',
 '8.1',
 '8.1',
 '6.9',
 '7.4',
 '8.1',
 '6.6',
 '7.7',
 '7.7',
 '8.6',
 '8.0',
 '7.3',
 '8.0',
 '8.1',
 '7.9',
 '7.9',
 '7.6',
 '8.2',
 '6.4',
 '7.6',
 '7.8',
 '8.0',
 '7.7',
 '7.7',
 '7.3',
 '8.1',
 '7.3',
 '8.0',
 '7.9',
 '7.6',
 '7.8',
 '7.0',
 '8.2',
 '7.7',
 '6.2',
 '7.5',
 '7.7',
 '7.7',
 '7.1',


#### Create a Dataframe with the Scraped data

In [47]:
imdb_df = pd.DataFrame({'Movie Title': movie_title, 'Year': year, 'Duration':duration,
                       'Genre': genre, 'Rating':rating})

In [48]:
imdb_df

Unnamed: 0,Movie Title,Year,Duration,Genre,Rating
0,Blade Runner 2049,2017,164 min,"Action, Drama, Mystery",8.0
1,Interstellar,2014,169 min,"Adventure, Drama, Sci-Fi",8.6
2,Joker,2019,122 min,"Crime, Drama, Thriller",8.4
3,Once Upon a Time... in Hollywood,2019,161 min,"Comedy, Drama",7.6
4,Promising Young Woman,2020,113 min,"Crime, Drama, Mystery",7.5
...,...,...,...,...,...
245,If Beale Street Could Talk,2018,119 min,"Drama, Romance",7.1
246,Finding Neverland,2004,106 min,"Biography, Drama, Family",7.7
247,The Artist,I 2011,100 min,"Comedy, Drama, Romance",7.9
248,The Last King of Scotland,2006,123 min,"Biography, Drama, History",7.7


In [49]:
# output as excel file
imdb_df.to_excel('imdb_multiple_pages.xlsx', index=False)