In [12]:
import os

def get_user_liked_and_disliked_movies(user):
    movie_preferences = "Movie Preferences"
    pfile = os.path.join(movie_preferences, f"{user}.txt") 

    all_movies = {}
    liked_movies = {}
    disliked_movies = {}

    with open(pfile) as p:
        lines = p.readlines()
        for line in lines:
            parts = line.strip().split(",")
            liked = parts[0][0]
            title = parts[0][1:]
            if len(parts) > 1:
                year = int(parts[1])
            else:
                year = -1
            if liked == "+":
                liked_movies[title] = year
            else:
                disliked_movies[title] = year
            all_movies[title] = year

    return all_movies, liked_movies, disliked_movies
        

In [13]:
from imdb import IMDb

def get_movie_by_title(db, title, release_year):
    search_results = db.search_movie(title)
    
    movieID = None
    # get first result
    if release_year == -1:
        movieID = search_results[0].movieID
    # get first result that matches year
    else:
        for result in search_results:
            if result['year'] == release_year:
                movieID = result.movieID
                break
    # get closest match
    if not movieID:
        closest = sorted(search_results, key=lambda x: abs(x['year'] - release_year))
        movieID = closest[0].movieID
    
    movie = db.get_movie(movieID, info=['synopsis', 'reviews'])

    return movie

In [14]:
def get_movie_reviews(movie):
    reviews_content = ""

    for review in movie['reviews']:
        title = review['title']
        content = review['content']
        reviews_content += f"{title} {content} "
    
    return reviews_content

In [15]:
def get_movies_plots_and_reviews(movies):
    inst = IMDb()

    plots = {}
    reviews = {}

    for movie, year in movies.items():
        for i in range(10):
            try:
                m = get_movie_by_title(inst, movie, year)
                revs = get_movie_reviews(m)
                synopsis = m['synopsis'][0]
            except:
                continue
            break
        else:
            print(f"Could not retrieve data for {movie} ({year})")
        plots[movie] = synopsis
        reviews[movie] = revs

    return plots, reviews