

In [118]:
#from constants import * # import API_KEY
from pyspark import SparkContext as sc
from collections import deque  
import requests

API_KEY = '04886c388e0fe62c04c621138df0ad61'
CRED  = '\033[91m'
CGREN = '\033[32m'
CEND  = '\033[0m'
# Start PySpark
#sc.stop() # stop any existing contexts
sc = SparkContext.getOrCreate()
BASE_API = "https://api.themoviedb.org/3"
# Make the movie list a deque so that up to 10 recommended movies can show and older movies can be dequeued
movie_list = deque(maxlen=10)

# Dictionary of genre ids -> genre names (gotten from endpoint /genre/movie/list)
GENRES = {
  28: "Action",
  12: "Adventure",
  16: "Animation",
  35: "Comedy",
  80: "Crime",
  99: "Documentary",
  18: "Drama",
  10751: "Family",
  14: "Fantasy",
  36: "History",
  27: "Horror",
  10402: "Music",
  9648: "Mystery",
  10749: "Romance",
  878: "Science Fiction",
  10770: "TV Movie",
  53: "Thriller",
  10752: "War",
  37: "Western"
}

def show_movies(req, movies):
    # Show error if the movies can't be shown (return false to indicate error)
    if req.status_code != 200:
        code = movies.get("status_code", "???")
        message = movies.get("status_message", "Unknown error, oh no!")
        print(f"Error {code}: {message}")
        return False
    elif movies["total_results"] == 0:
        print("No results found.")
        return False
    
    # Limit to the first 10 results
    print("Top 10 results:")
    for index, result in enumerate(movies["results"][:10]):
        # Add placeholders to any missing fields
        title = result.get("title", "no title")
        release_date = result.get("release_date", "no release date")
        overview = result.get("overview", "no description available")
        print(f"{index + 1}. {title} ({release_date}) - {overview}\n")
    
    return True

def update_watchlist(movies, num_movies=10):
    # Ask the user if they want to add movies to their watchlist
    print("Which movies interest you?")
    movie_choices = input("Enter the numbers in the list separated by a space (e.g. 1 2 3): ").split()
    
    # Add the id of the movie to the movie list for future GET requests
    for index in movie_choices:
        try:
            movie_index = int(index)
            
            if movie_index >= 1 and movie_index <= num_movies:
                movie = movies["results"][movie_index - 1]
                movie_list.append(movie["id"])
                print(f"Added {movie['title']} to your watchlist")
        except ValueError:
            continue # ignore any invalid inputs

def search_by_name():
    # /search/movie
    movie_query = input("Which movie would you like to see? ")
    
    if not movie_query:
        return # can't run an empty query

    # Search for a movie from TMDb
    req = requests.get(f"{BASE_API}/search/movie?api_key={API_KEY}&query={movie_query}")
    movies = req.json()
    
    if show_movies(req, movies):
        update_watchlist(movies)

def search_by_year():
    #/discover/movie?primary release year =
    movie_year_query = input("Which year would you like to see a movie from? ")
    
    if not movie_year_query:
        return # can't run an empty query

    # Search for a movie from TMDb
    req = requests.get(f"{BASE_API}/discover/movie?api_key={API_KEY}&primary_release_year={movie_year_query}")
    movies = req.json()
    
    if show_movies(req, movies):
        update_watchlist(movies)
        

def search_by_ratings():
    pass
    # /discover/movie?vote_average.gte=
#     movie-rating_query = input("Which movie has a high rating? ")
    
#     if not movie-rating_query:
#         return # can't run an empty query

#     # Search for a movie from TMDb
#     req = requests.get(f"{BASE_API}/search/movie?api_key={API_KEY}&query={movie-rating_query}")
#     movies = req.json()
    
#     if show_movies(req, movies):
#         update_watchlist(movies)
#     pass

def search_by_genre():
    # /discover/movie?with_genres=
    print(f"List of genres:\n{GENRES}")
    genre_id = 0
    
    # Get a valid genre id from the user
    while GENRES.get(genre_id) is None and genre_id != -1:
        try:
            genre_id = int(input("Please select a genre number (or -1 to exit): "))
            
            if GENRES.get(genre_id) is None and genre_id != -1:
                print("That's not a valid genre id, try again")
        except ValueError:
            print("That's not an integer, try again")
    
    if genre_id == -1:
        return # -1 = go back to the main menu
    
    # Search movies given the genre id (sort by popularity in descending order)
    req = requests.get(
        f"{BASE_API}/discover/movie?api_key={API_KEY}&with_genres={genre_id}&sort_by=popularity.desc")
    movies = req.json()
    
    if show_movies(req, movies):
        update_watchlist(movies)

def popular_movies():


    req = requests.get(f"{BASE_API}/discover/movie?api_key={API_KEY}&sort_by=popularity.desc")
    movies = req.json()
    schema_list = sc.parallelize(movies['results']).collect()

    for i in range(len(schema_list)):
        print(CRED + movies['results'][i]['original_title']+'\n' + CEND)
        print(movies['results'][i]['overview']+'\n')
        

  
def top_rated():

    req = requests.get(f"{BASE_API}/discover/movie?api_key={API_KEY}&sort_by=vote_average.desc")
    movies = req.json()
    schema_list = sc.parallelize(movies['results']).collect()

    for i in range(len(schema_list)):
        print(CGREN + movies['results'][i]['original_title']+'\n' + CEND)
        print(movies['results'][i]['overview']+'\n')
        
def now_playing():
    # /movie/now_playing
    pass

def upcoming():
    # /movie/upcoming
    pass

def get_rec_info(movie_id):
    # For each movie on the watchlist, show a handful of recommendations
    req = requests.get(f"{BASE_API}/movie/{movie_id}/recommendations?api_key={API_KEY}")
    movies = req.json()

    if req.status_code != 200:
        code = movies.get("status_code", "???")
        message = movies.get("status_message", "Unknown error, oh no!")
        return code, message

    rec = 0 # rec = local result index, rec_movies_index = global result index
    rec_info = [] # a list of tuples about each recommendation
    result_limit = 10 // len(movie_list) # limit to up to 10 recommendations

    while rec < result_limit and rec < movies["total_results"]:
        result = movies["results"][rec]
        # Add all the required fields to display the output and add to the watchlist
        _id = result.get("id", -1)
        title = result.get("title", "no title")
        release_date = result.get("release_date", "no release date")
        overview = result.get("overview", "no description available")
        genres = result.get("genre_ids", [])
        rec_info.append((_id, title, release_date, overview, genres))
        rec += 1
    
    return rec_info

def recommend_movies():
    # /movie/{movie_id}/recommendations
    if not movie_list:
        # The watchlist is empty, so there's nothing to recommend
        print("There's nothing to recommend. Add some movies to your watchlist.")
        return
    
    print("Based on your watch history, you might like:")
    rec_movies = {"results": []} # make rec_movies resemble the movies dict to pass to update_watchlist
    rec_movies_index = 0
    
    # Flatten the list of tuples containing recommendation info
    rec_info = sc.parallelize(movie_list).flatMap(get_rec_info).collect()
    
    if type(rec_info) is tuple:
        # (status code, status message)
        print(f"Error {rec_info[0]}: {rec_info[1]}")
        return
    
    # Show each recommendation and ask for any updates to the watchlist
    for _id, title, release_date, overview, *_ in rec_info:
        print(f"{rec_movies_index + 1}. {title} ({release_date}) - {overview}\n")
        rec_movies["results"].append({
            "id": _id, # id is a built-in function
            "title": title,
            "release_date": release_date,
            "overview": overview
        })
        rec_movies_index += 1
    
    # Don't ask the user to update their watchlist if there aren't any recommendations to show
    if rec_movies["results"]:
        update_watchlist(rec_movies, num_movies=len(rec_movies["results"]))
    else:
        print("Sorry, we couldn't find any recommendations for you. Try adding more movies to your watchlist.")

def recommend_by_year():
    # /movie/{movie_id}/recommendations by year
    if not movie_list:
        # The watchlist is empty, so there's nothing to recommend
        print("There's nothing to recommend. Add some movies to your watchlist.")
        return
    
    # Ask which year to get recommendations from
    movie_year = input("Which year would you like to see a movie from? ")
    
    print("Based on your watch history, you might like these movies:")
    rec_year = {"results": []} # make rec_movies resemble the movies of each year to update_watchlist
    rec_year_index = 0
    
    # tup = (id, title, release_date, overview, genres), release_date = year-month-day
    rec_info = sc.parallelize(movie_list).flatMap(get_rec_info)\
                 .filter(lambda tup: tup[2].split("-")[0] == movie_year).collect()
    
    if type(rec_info) is tuple:
        # (status code, status message)
        print(f"Error {rec_info[0]}: {rec_info[1]}")
        return
    
    # Show each year recommendation and ask for any updates to the watchlist
    for _id, title, release_date, overview, *_ in rec_info:
        print(f"{rec_year_index + 1}. {title} ({release_date}) - {overview}\n")
        rec_year["results"].append({
            "id": _id, # id is a built-in function
            "title": title,
            "release_date": release_date,
            "overview": overview
        })
        rec_year_index += 1
    
    # Don't ask the user to update their watchlist if there aren't any recommendations to show for that year
    if rec_year["results"]:
        update_watchlist(rec_year, num_movies=len(rec_year["results"]))
    else:
        print("Sorry, we couldn't find any recommendations for you this year. "
              " Try adding more movies to your watchlist.")

def recommend_by_genre():
    raise NotImplementedError
 # /movie/{movie_id}/recommendations by genre
#     if not movie_genre:
#         # The watchlist is empty, so there's nothing to recommend
#         print("Wrong movie genre.")
#         return
    
#     print("Based on your watch history, you might like these movie:")
#     rec_genre = {"results": []} # make rec_genre resemble the movies of each genre to update_watchlist
#     rec_genre_index = 0
    
#     # Flatten the list of tuples containing recommendation info
#     rec_info = sc.parallelize(movie_list).flatMap(get_rec_info).collect()
    
#     if type(rec_info) is tuple:
#         # (status code, status message)
#         print(f"Error {rec_info[0]}: {rec_info[1]}")
#         return
    
#     # Show each genre recommendation and ask for any updates to the watchlist
#     for _id, title, release_date, overview, *_ in rec_info:
#         print(f"{rec_genre_index + 1}. {title} ({release_date}) - {overview}\n")
#         rec_genre["results"].append({
#             "id": _id, # id is a built-in function
#             "title": title,
#             "release_date": release_date,
#             "overview": overview
#         })
#         rec_genre_index += 1
    
#     # Don't ask the user to update their watchlist if there aren't any recommendations to show for that year
#     if rec_genre["results"]:
#         update_watchlist(rec_genre, num_genre=len(rec_genre["results"]))
#     else:
#         print("Sorry, we couldn't find any recommendations for this particular genre. Try adding more movies to your watchlist.")

def get_movie_info(movie_id):
    req = requests.get(f"{BASE_API}/movie/{movie_id}?api_key={API_KEY}")
    movie = req.json()

    if req.status_code != 200:
        code = movie.get("status_code", "???")
        message = movie.get("status_message", "Unknown error, oh no!")
        return code, message
    else:
        # Add placeholders to any missing fields
        title = movie.get("title", "no title")
        release_date = movie.get("release_date", "no release date")
        overview = movie.get("overview", "no description available")
        return title, release_date, overview

def print_watchlist():
    # Print movie_list
    if not movie_list:
        print("Your watchlist is empty. Start adding movies.")
        return
    
    print("Your watchlist:")
    # Fetch information about each movie in parallel
    movies = sc.parallelize(movie_list).map(get_movie_info).collect()
    
    for movie in movies:
        if len(movie) == 2:
            # (status code, status message)
            print(f"Error {movie[0]}: {movie[1]}")
        else:
            # (title, release date, overview)
            print(f"{movie[0]} ({movie[1]}) - {movie[2]}\n")

option = -1
TOTAL_OPTIONS = 13
print("Welcome to Spark Movies!\n")

while option != TOTAL_OPTIONS:
    # Keep showing the menu until the user exits
    print("1. Search movie by name")
    print("2. Search by year")
    print("3. Search by ratings")
    print("4. Search by genre")
    print("5. Popular movies")
    print("6. Top rated movies")
    print("7. Movies in theaters")
    print("8. Upcoming movies")
    print("9. Recommend movies")
    print("10. Recommend by year")
    print("11. Recommend by genre")
    print("12. View watchlist")
    print("13. Exit")
    option = -1
    
    # Check for valid input
    while option < 1 or option > TOTAL_OPTIONS:
        try:
            option = int(input("Please select an option: "))
            
            if option < 1 or option > TOTAL_OPTIONS:
                print(f"Option must be from 1-{TOTAL_OPTIONS}, try again")
        except ValueError:
            print("That's not an integer, try again")

    if option == 1:
        search_by_name()
    elif option == 2:
        search_by_year()
    elif option == 3:
        search_by_ratings()
    elif option == 4:
        search_by_genre()
    elif option == 5:
        popular_movies()
    elif option == 6:
        top_rated()
    elif option == 7:
        now_playing()
    elif option == 8:
        upcoming()
    elif option == 9:
        recommend_movies()
    elif option == 10:
        recommend_by_year()
    elif option == 11:
        recommend_by_genre()
    elif option == 12:
        print_watchlist()
    else:
        # Exit the program
        print("Have a nice day!")
    
    print() # leave extra space at the end

# Stop PySpark


sc.stop()

Welcome to Spark Movies!

1. Search movie by name
2. Search by year
3. Search by ratings
4. Search by genre
5. Popular movies
6. Top rated movies
7. Movies in theaters
8. Upcoming movies
9. Recommend movies
10. Recommend by year
11. Recommend by genre
12. View watchlist
13. Exit
Please select an option: 5
[91mJiu Jitsu
[0m
Every six years, an ancient order of jiu-jitsu fighters joins forces to battle a vicious race of alien invaders. But when a celebrated war hero goes down in defeat, the fate of the planet and mankind hangs in the balance.

[91mFatman
[0m
A rowdy, unorthodox Santa Claus is fighting to save his declining business. Meanwhile, Billy, a neglected and precocious 12 year old, hires a hit man to kill Santa after receiving a lump of coal in his stocking.

[91mUpside-Down Magic
[0m
Nory and her best friend Reina enter the Sage Academy for Magical Studies, where Nory’s unconventional powers land her in a class for those with wonky, or “upside-down,” magic. Undaunted, Nory

KeyboardInterrupt: ignored

In [75]:
!wget https://raw.githubusercontent.com/CTopham/TophamRepo/master/Movie%20Project/Resources/tmdb_5000_movies.csv

--2020-12-10 22:15:20--  https://raw.githubusercontent.com/CTopham/TophamRepo/master/Movie%20Project/Resources/tmdb_5000_movies.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5698602 (5.4M) [text/plain]
Saving to: ‘tmdb_5000_movies.csv’


2020-12-10 22:15:20 (28.0 MB/s) - ‘tmdb_5000_movies.csv’ saved [5698602/5698602]



In [76]:
ls

[0m[01;34msample_data[0m/  tmdb_5000_movies.csv
