# SetUp

In [1]:
import requests

In [2]:
from bs4 import BeautifulSoup

In [3]:
url = "https://editorial.rottentomatoes.com/guide/best-sci-fi-movies-of-all-time/"

In [4]:
response = requests.get(url)
response

<Response [200]>

In [5]:
html = response.content

In [6]:
soup = BeautifulSoup(html, 'lxml')

In [7]:
with open('Rotten_Tomatoes_SF.html', 'wb') as file:
    file.write(soup.prettify('utf-8'))

# Get all data

In [8]:
divs = soup.find_all('div', {'class': 'col-sm-18 col-full-xs countdown-item-content'})

In [71]:
##divs

# Extracting the title and the year

In [10]:
headings = [div.find('h2') for div in divs]

In [72]:
##headings

In [12]:
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/hitchhikers_guide_to_the_galaxy/">The Hitchhiker's Guide to the Galaxy</a> <span class="subtle start-year">(2005)</span> <span class="icon tiny fresh" title="Fresh"></span> <span class="tMeterScore">60%</span></h2>

## Title

In [73]:
movie_names = [heading.find('a').string for heading in headings]
##movie_names

## Year

In [74]:
years = [heading.find('span', class_='start-year').string for heading in headings]
##years

In [75]:
years = [year.strip('()') for year in years]
years = [int(year) for year in years]
##years

## Score

In [26]:
scores = [heading.find('span', class_ = 'tMeterScore').string for heading in headings]

In [76]:
##scores

In [28]:
scores = [s.strip('%') for s in scores]

In [29]:
scores = [int(s) for s in scores]

In [77]:
##scores

## Extracting the rest of the data

### Critics Consensus

In [31]:
consensus = [div.find('div', {'class': "info critics-consensus"}) for div in divs]

In [78]:
##consensus

In [83]:
##[con.text for con in consensus]

In [34]:
consensus[0]

<div class="info critics-consensus"><span class="descriptor">Critics Consensus:</span> A frantic and occasional funny adaptation of Douglas Adams' novel. However, it may have those unfamiliar with the source material scratching their heads.</div>

In [79]:
consensus_text = [con.contents[1].strip() for con in consensus]
##consensus_text

### Directors

In [80]:
directors = [div.find('div', class_="director") for div in divs]
##directors

In [40]:
[directors[0]]

[<div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/garth_jennings_2/">Garth Jennings</a></div>]

In [81]:
##[director.find("a") for director in directors]

In [82]:
final_directors = [None if director.find('a') is None else director.find("a").string for director in directors]
##final_directors

### Cast Info

In [84]:
cast_info = [div.find("div", class_="cast") for div in divs]
##cast_info

In [85]:
cast = []

for c in cast_info:
    cast_links = c.find_all('a')
    cast_names = [link.string for link in cast_links]
    result = ", ".join(cast_names)
    
    cast.append(result)

##cast

In [86]:
adj_scores = [div.find("div", {"class": "info countdown-adjusted-score"}) for div in divs]
##adj_scores

In [59]:
adj_scores[0]

<div class="info countdown-adjusted-score"><span class="descriptor">Adjusted Score: </span>66.677% <span class="glyphicon glyphicon-question-sign" data-html="true" data-original-title="The Adjusted Score comes from a weighted formula (Bayesian) that we use that accounts for variation in the number of reviews per movie." data-placement="top" data-toggle="tooltip" rel="tooltip" title=""></span></div>

In [60]:
adj_scores_clean = [score.contents[1].strip('% ') for score in adj_scores]

In [87]:
final_adj = [float(score) for score in adj_scores_clean]
##final_adj

### Synopsis

In [88]:
synopsis = [div.find("div", class_="synopsis") for div in divs]
##synopsis

In [63]:
synopsis[0]

<div class="info synopsis"><span class="descriptor">Synopsis:</span> Earthman Arthur Dent is having a very bad day. His house is about to be bulldozed, he discovers that his...<a class="" data-pageheader="" href="https://www.rottentomatoes.com/m/hitchhikers_guide_to_the_galaxy/" target="_top"> [More]</a></div>

In [89]:
synopsis_text = [syn.contents[1] for syn in synopsis]
##synopsis_text

# Representing the data in a structured form

In [65]:
import pandas as pd

## Creating a DataFrame and populating it

In [67]:
movies_info = pd.DataFrame()

movies_info["Movie Title"] = movie_names
movies_info["Year"] = years
movies_info["Score"] = scores
movies_info["Adjusted Score"] = final_adj
movies_info["Director"] = final_directors
movies_info["Synopsis"] = synopsis_text
movies_info["Cast"] = cast
movies_info["Consensus"] = consensus_text

In [68]:
pd.set_option('display.max_colwidth', None)
movies_info

Unnamed: 0,Movie Title,Year,Score,Adjusted Score,Director,Synopsis,Cast,Consensus
0,The Hitchhiker's Guide to the Galaxy,2005,60,66.677,Garth Jennings,"Earthman Arthur Dent is having a very bad day. His house is about to be bulldozed, he discovers that his...","Sam Rockwell, Zooey Deschanel, Yasiin Bey, Martin Freeman","A frantic and occasional funny adaptation of Douglas Adams' novel. However, it may have those unfamiliar with the source material scratching their heads."
1,The Endless,2018,92,96.940,Justin Benson,"Following their Lovecraftian modern cult classic SPRING, acclaimed filmmakers Moorhead and Benson return with this mind-bending thriller that follows two...","Aaron Moorhead, Justin Benson, Tate Ellington, Callie Hernandez","The Endless benefits from its grounded approach to an increasingly bizarre story, elevated by believable performances by filmmakers Justin Benson and Aaron Moorhead."
2,Los Cronocrímenes (Timecrimes),2007,89,90.708,Nacho Vigalondo,An ordinary guy takes an extraordinary step through time in this science fiction thriller. Hector (Karra Elejalde) is spending a...,"Karra Elejalde, Candela Fernández, Nacho Vigalondo, Bárbara Goenaga",Timecrimes is a low-budget thriller that's well-crafted and loaded with dark humor and bizarre twists.
3,Ad Astra,2019,84,100.634,James Gray,Astronaut Roy McBride (Brad Pitt) travels to the outer edges of the solar system to find his missing father and...,"Brad Pitt, Tommy Lee Jones, Ruth Negga, Liv Tyler",Ad Astra takes a visually thrilling journey through the vast reaches of space while charting an ambitious course for the heart of the bond between parent and child.
4,Westworld,1973,87,90.446,Michael Crichton,"Welcome to Westworld, where nothing can go wrong...go wrong...go wrong....Writer/director Michael Crichton has concocted a futuristic ""Disneyland for adults"", a...","Richard Benjamin, James Brolin, Yul Brynner, Alan Oppenheimer",Yul Brynner gives a memorable performance as a robotic cowboy in this amusing sci-fi/western hybrid.
...,...,...,...,...,...,...,...,...
145,Star Wars: Episode IV - A New Hope,1977,92,105.515,George Lucas,"Nineteen years after the formation of the Empire, Luke Skywalker is thrust into the struggle of the Rebel Alliance when...","Mark Hamill, Harrison Ford, Carrie Fisher, Alec Guinness","A legendarily expansive and ambitious start to the sci-fi saga, George Lucas opened our eyes to the possibilities of blockbuster filmmaking and things have never been the same."
146,Metropolis,1927,97,108.519,Fritz Lang,"In a futuristic city sharply divided between the working class and the city planners, the son of the city's mastermind...","Alfred Abel, Gustav Frolich, Brigitte Helm, Gustav Fröhlich",A visually awe-inspiring science fiction classic from the silent era.
147,Blade Runner,1982,90,101.549,Ridley Scott,"A blend of science fiction and noir detective fiction, Blade Runner (1982) was a box office and critical bust upon...","Harrison Ford, Rutger Hauer, Sean Young, Daryl Hannah","Misunderstood when it first hit theaters, the influence of Ridley Scott's mysterious, neo-noir Blade Runner has deepened with time. A visually remarkable, achingly human sci-fi masterpiece."
148,Back to the Future,1985,96,102.355,Robert Zemeckis,Contemporary high schooler Marty McFly (Michael J. Fox) doesn't have the most pleasant of lives. Browbeaten by his principal at...,"Michael J. Fox, Christopher Lloyd, Crispin Glover, Lea Thompson","Inventive, funny, and breathlessly constructed, Back to the Future is a rousing time-travel adventure with an unforgettable spirit."


# Exporting the data

In [69]:
movies_info.to_csv("SF_movies.csv", index = False, header = True)

In [70]:
movies_info.to_excel("SF_movies.xlsx", index = False, header = True)