In [1]:
from requests import get
url = 'https://www.imdb.com/list/ls000729643/'
response = get(url)

In [2]:
from bs4 import BeautifulSoup
raw_html = BeautifulSoup(response.text, 'html.parser')
movies = raw_html.find_all('div', class_ = 'lister-item mode-detail')

In [3]:
# Lists to store the scraped data in
ranks = []
titles = []
descriptions = []
runtimes = []
genres = []
ratings = []
metascores = []
votes = []
gross_earnings = []
directors = []
actors = []


# Extract data from individual movie container
for movie in movies:

        # The rank
        rank = movie.h3.find('span', class_ = 'text-primary').text 
        ranks.append(rank)
        
        # The name
        title = movie.h3.a.text
        titles.append(title)
        
        # The description
        raw_description = movie.find('p', class_ = '').text
        description = raw_description.replace("\n","")
        descriptions.append(description)
        
        # The runtime
        raw_runtime = movie.p.find('span', class_ = 'runtime').text
        runtime = raw_runtime.replace("min","")
        runtimes.append(int(runtime))
        
        # The genre
        raw_genre = movie.p.find('span', class_ = 'genre').text
        genre = raw_genre.replace("\n","")
        genres.append(genre)
        
        # The IMDB rating
        rating = float(movie.find('span', class_='ipl-rating-star__rating').text)
        ratings.append(rating)

        # The Metascore
        metascore = movie.find('span', class_ = 'metascore').text
        metascores.append(int(metascore))

        # The number of votes
        vote = movie.find('span', attrs = {'name':'nv'})['data-value']
        votes.append(int(vote))
        
        # The gross earning in millions
        raw_earning = movie.select("span[name=nv]")[1].text
        raw_earning_wo_dollar = raw_earning.replace("$","")
        earning = raw_earning_wo_dollar.replace("M","")
        gross_earnings.append(float(earning)) 
        
        # List directors and actors
        raw_directorandactor = movie.select("p.text-muted.text-small")[1].text
        directorandactor = raw_directorandactor.replace("\n","")
        
            #Separate directors and actors
        director = directorandactor.split('|')[0]
        actor = directorandactor.split('|')[1]
            
            #Data preprocessing 
                #Director
        director_no_space = director.replace('\n','')
        director_final = director_no_space.replace('Director:','')
        director_final = director_final.replace('Directors:','') 
        directors.append(director_final)
                #Actor
        actor_no_space = actor.replace('\n','')
        actor_final = actor.replace('Stars:', '')
        actors.append(actor_final)
        


In [4]:
import pandas as pd

movies_df = pd.DataFrame({'rank': ranks,
                        'movie': titles,
                        'description': descriptions,
                        'runtime': runtimes,
                        'genre': genres,
                        'rating': ratings,
                        'metascore': metascores,
                        'votes': votes,
                        'gross earning in millions': gross_earnings,
                        'directors': directors,
                        'actors': actors
                       })
print(movies_df.info())
movies_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 11 columns):
rank                         50 non-null object
movie                        50 non-null object
description                  50 non-null object
runtime                      50 non-null int64
genre                        50 non-null object
rating                       50 non-null float64
metascore                    50 non-null int64
votes                        50 non-null int64
gross earning in millions    50 non-null float64
directors                    50 non-null object
actors                       50 non-null object
dtypes: float64(2), int64(3), object(6)
memory usage: 4.4+ KB
None


Unnamed: 0,rank,movie,description,runtime,genre,rating,metascore,votes,gross earning in millions,directors,actors
0,1.0,Wedding Crashers,"John Beckwith and Jeremy Grey, a pair of c...",119,"Comedy, Romance",7.0,64,312875.0,209.22,David Dobkin,"Owen Wilson, Vince Vaughn, Rachel McAdams..."
1,2.0,Anchorman: The Legend of Ron Burgundy,Ron Burgundy is San Diego's top-rated news...,94,Comedy,7.2,63,306921.0,85.29,Adam McKay,"Will Ferrell, Christina Applegate, Steve ..."
2,3.0,Walk Hard: The Dewey Cox Story,Singer Dewey Cox overcomes adversity to be...,96,"Comedy, Music",6.8,63,62487.0,18.32,Jake Kasdan,"John C. Reilly, Jenna Fischer, David Krum..."
3,4.0,Step Brothers,Two aimless middle-aged losers still livin...,98,Comedy,6.9,51,245249.0,100.47,Adam McKay,"Will Ferrell, John C. Reilly, Mary Steenb..."
4,5.0,The Hangover,Three buddies wake up from a bachelor part...,100,Comedy,7.7,73,665777.0,277.32,Todd Phillips,"Zach Galifianakis, Bradley Cooper, Justin..."
5,6.0,Office Space,Three company workers who hate their jobs ...,89,Comedy,7.8,68,225768.0,10.82,Mike Judge,"Ron Livingston, Jennifer Aniston, David H..."
6,7.0,Dumb and Dumber,The cross-country adventures of 2 good-hea...,107,Comedy,7.3,41,327597.0,127.18,"Peter Farrelly, Bobby Farrelly","Jim Carrey, Jeff Daniels, Lauren Holly, M..."
7,8.0,Austin Powers: International Man of Mystery,A 1960s secret agent is brought out of cry...,89,"Adventure, Comedy",7.0,51,205526.0,53.88,Jay Roach,"Mike Myers, Elizabeth Hurley, Michael Yor..."
8,9.0,Austin Powers: The Spy Who Shagged Me,Dr. Evil is back and has invented a new ti...,95,"Action, Adventure, Comedy",6.6,59,202677.0,206.04,Jay Roach,"Mike Myers, Heather Graham, Michael York,..."
9,10.0,Starsky & Hutch,Two streetwise cops bust criminals in thei...,101,"Comedy, Crime",6.1,55,131121.0,88.24,Todd Phillips,"Ben Stiller, Owen Wilson, Snoop Dogg, Vin..."
