In [None]:
import ast
import urllib

import pandas as pd
import requests
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [11]:
def convert_dict(dict_list):
    genres = []
    for item in ast.literal_eval(dict_list):
        genres.append(item["name"])

    return genres


def get_actors(dict_list):
    actors = []
    num = 3
    dict_list = ast.literal_eval(dict_list)
    for item in dict_list[:num]:
        actors.append(item["name"])

    return actors


def get_director(dict_list):
    director = []
    for item in ast.literal_eval(dict_list):
        if item["job"] == "Director":
            director.append(item["name"])
            break

    return director


def stem_tags(text):
    stemmer = PorterStemmer()
    word_list = []
    for item in text.split():
        word_list.append(stemmer.stem(item))

    return " ".join(word_list)

In [12]:
movies = pd.read_csv(r"./data/tmdb_5000_movies.csv")
credits = pd.read_csv(r"./data/tmdb_5000_credits.csv")

In [13]:
movies = movies.merge(credits, on="title")

In [5]:
movies.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,49026,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,49529,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [43]:
movies.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count', 'movie_id', 'cast', 'crew'],
      dtype='object')

In [15]:
movie_cols = [
    "movie_id",
    "title",
    "overview",
    "genres",
    "keywords",
    "cast",
    "crew",
]
movies = movies[movie_cols]

In [30]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,runtime,release_date,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",162.0,2009-12-10,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",169.0,2007-05-19,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",148.0,2015-10-26,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",165.0,2012-07-16,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",132.0,2012-03-07,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [16]:
movies.isnull().sum()

movie_id    0
title       0
overview    3
genres      0
keywords    0
cast        0
crew        0
dtype: int64

In [17]:
movies = movies.dropna()

In [18]:
movies.duplicated().sum()

0

In [19]:
movies["genres"] = movies["genres"].apply(convert_dict)
movies["keywords"] = movies["keywords"].apply(convert_dict)
movies["cast"] = movies["cast"].apply(get_actors)
movies["crew"] = movies["crew"].apply(get_director)
movies["overview"] = movies["overview"].apply(lambda x: x.split())

In [20]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",[Gore Verbinski]
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]",[Sam Mendes]
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[Christian Bale, Michael Caine, Gary Oldman]",[Christopher Nolan]
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[Taylor Kitsch, Lynn Collins, Samantha Morton]",[Andrew Stanton]


In [None]:
# dill.dump(movies, open("movies_data.pkl", "wb"))

In [21]:
for col in ["genres", "keywords", "cast", "crew"]:
    movies[col] = movies[col].apply(lambda x: [s.replace(" ", "") for s in x])

In [22]:
movies["tags"] = (
    movies["overview"]
    + movies["genres"]
    + movies["keywords"]
    + movies["cast"]
    + movies["crew"]
)

In [23]:
movies

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,tags
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],"[Captain, Barbossa,, long, believed, to, be, d..."
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes],"[A, cryptic, message, from, Bond’s, past, send..."
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],"[Following, the, death, of, District, Attorney..."
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton],"[John, Carter, is, a, war-weary,, former, mili..."
...,...,...,...,...,...,...,...,...
4804,9367,El Mariachi,"[El, Mariachi, just, wants, to, play, his, gui...","[Action, Crime, Thriller]","[unitedstates–mexicobarrier, legs, arms, paper...","[CarlosGallardo, JaimedeHoyos, PeterMarquardt]",[RobertRodriguez],"[El, Mariachi, just, wants, to, play, his, gui..."
4805,72766,Newlyweds,"[A, newlywed, couple's, honeymoon, is, upended...","[Comedy, Romance]",[],"[EdwardBurns, KerryBishé, MarshaDietlein]",[EdwardBurns],"[A, newlywed, couple's, honeymoon, is, upended..."
4806,231617,"Signed, Sealed, Delivered","[""Signed,, Sealed,, Delivered"", introduces, a,...","[Comedy, Drama, Romance, TVMovie]","[date, loveatfirstsight, narration, investigat...","[EricMabius, KristinBooth, CrystalLowe]",[ScottSmith],"[""Signed,, Sealed,, Delivered"", introduces, a,..."
4807,126186,Shanghai Calling,"[When, ambitious, New, York, attorney, Sam, is...",[],[],"[DanielHenney, ElizaCoupe, BillPaxton]",[DanielHsia],"[When, ambitious, New, York, attorney, Sam, is..."


In [24]:
df = movies[["movie_id", "title", "tags"]]
df["tags"] = df["tags"].apply(lambda x: " ".join(x).lower())
df["tags"] = df["tags"].apply(stem_tags)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["tags"] = df["tags"].apply(lambda x: " ".join(x).lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["tags"] = df["tags"].apply(stem_tags)


In [25]:
df.head()

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"in the 22nd century, a parapleg marin is dispa..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believ to be dead, ha c..."
2,206647,Spectre,a cryptic messag from bond’ past send him on a...
3,49026,The Dark Knight Rises,follow the death of district attorney harvey d...
4,49529,John Carter,"john carter is a war-weary, former militari ca..."


In [26]:
vect = CountVectorizer(max_features=5000, stop_words="english")
vectors = vect.fit_transform(df["tags"]).toarray()
sim_matrix = cosine_similarity(vectors)

In [18]:
def recommend_movies(movie, num=5):
    movie_index = df[df["title"] == movie].index[0]
    distances = sim_matrix[movie_index]
    id_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[
        1 : num + 1
    ]
    rec_list = []

    for item in id_list:
        rec_list.append({"movie_id": item[0], "title": df.iloc[item[0]].title})

    return rec_list

In [27]:
movie_list = recommend_movies("Pirates of the Caribbean: At World's End", 10)

In [29]:
import urllib.parse

url_list = []
for item in movie_list:
    name = urllib.parse.quote_plus(item["title"])
    url = f"http://www.omdbapi.com/?apikey={api_key}&t={name}"
    url_list.append(url)

url_list

['http://www.omdbapi.com/?apikey=adb38f28&t=Pirates+of+the+Caribbean%3A+Dead+Man%27s+Chest',
 'http://www.omdbapi.com/?apikey=adb38f28&t=Pirates+of+the+Caribbean%3A+The+Curse+of+the+Black+Pearl',
 'http://www.omdbapi.com/?apikey=adb38f28&t=Pirates+of+the+Caribbean%3A+On+Stranger+Tides',
 'http://www.omdbapi.com/?apikey=adb38f28&t=Life+of+Pi',
 'http://www.omdbapi.com/?apikey=adb38f28&t=20%2C000+Leagues+Under+the+Sea',
 'http://www.omdbapi.com/?apikey=adb38f28&t=Puss+in+Boots',
 'http://www.omdbapi.com/?apikey=adb38f28&t=The+Black+Hole',
 'http://www.omdbapi.com/?apikey=adb38f28&t=The+Pirates%21+In+an+Adventure+with+Scientists%21',
 'http://www.omdbapi.com/?apikey=adb38f28&t=The+Indian+in+the+Cupboard',
 'http://www.omdbapi.com/?apikey=adb38f28&t=The+Chronicles+of+Narnia%3A+The+Voyage+of+the+Dawn+Treader']

In [28]:
movie_list

[{'movie_id': 12, 'title': "Pirates of the Caribbean: Dead Man's Chest"},
 {'movie_id': 199,
  'title': 'Pirates of the Caribbean: The Curse of the Black Pearl'},
 {'movie_id': 17, 'title': 'Pirates of the Caribbean: On Stranger Tides'},
 {'movie_id': 216, 'title': 'Life of Pi'},
 {'movie_id': 3570, 'title': '20,000 Leagues Under the Sea'},
 {'movie_id': 187, 'title': 'Puss in Boots'},
 {'movie_id': 2131, 'title': 'The Black Hole'},
 {'movie_id': 848, 'title': 'The Pirates! In an Adventure with Scientists!'},
 {'movie_id': 1075, 'title': 'The Indian in the Cupboard'},
 {'movie_id': 109,
  'title': 'The Chronicles of Narnia: The Voyage of the Dawn Treader'}]

In [22]:
movies_data = []
for url in url_list:
    res = requests.get(url)
    data = res.json()
    movie_data = []
    data_dict = {
        "Title": data["Title"],
        "Year released": data["Year"],
        "Director": data["Director"],
        "Writer": data["Writer"],
        "Actors": data["Actors"],
        "Rating": data["Rated"],
        "Runtime": data["Runtime"],
        "Genre": data["Genre"],
        "Plot": data["Plot"],
    }
    movie_data.append(data_dict)
    movie_data.append(data["Poster"])
    movies_data.append(movie_data)

In [23]:
movies_data

[[{'Title': 'Aliens vs. Predator: Requiem',
   'Year released': '2007',
   'Director': 'Colin Strause, Greg Strause',
   'Writer': "Shane Salerno, Dan O'Bannon, Ronald Shusett",
   'Actors': 'Reiko Aylesworth, Steven Pasquale, Shareeka Epps',
   'Rating': 'R',
   'Runtime': '94 min',
   'Genre': 'Action, Horror, Sci-Fi',
   'Plot': 'Warring Alien and Predator races descend on a rural Colorado town, where unsuspecting residents must band together for any chance of survival.'},
  'https://m.media-amazon.com/images/M/MV5BMTI5NDY2NDUwM15BMl5BanBnXkFtZTYwNzQxMTA3._V1_SX300.jpg'],
 [{'Title': 'Aliens',
   'Year released': '1986',
   'Director': 'James Cameron',
   'Writer': 'James Cameron, David Giler, Walter Hill',
   'Actors': 'Sigourney Weaver, Michael Biehn, Carrie Henn',
   'Rating': 'R',
   'Runtime': '137 min',
   'Genre': 'Action, Adventure, Sci-Fi',
   'Plot': 'Decades after surviving the Nostromo incident, Ellen Ripley is sent out to re-establish contact with a terraforming colony 

Mike White
