### IMPORTING LIBRARIES

In [183]:
import pandas as pd
import numpy as np
import ast
import json
import requests
import matplotlib.pyplot as plt
%matplotlib inline
import string
import re
import nltk
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import csr_matrix
from scipy.sparse import hstack
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity

In [184]:
df1 = pd.read_csv('tmdb_5000_credits.csv')
df2 = pd.read_csv('tmdb_5000_movies.csv')

### DATA CLEANING

In [185]:
# observe Dataset_1
df1.shape

(4803, 4)

In [186]:
# observe Dataset_2
df2.shape

(4803, 20)

In [187]:
# Convert strings to lists of dictionaries
df1["cast"] = df1["cast"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Create new column in df1 - 'cast_names'
df1["cast_names"] = df1["cast"].apply(lambda x: [d["name"] for d in x])

In [188]:
crew_list = json.loads(df1['crew'][1])
for crew_member in crew_list:
    print(crew_member['job'])

Director of Photography
Director
Producer
Screenplay
Screenplay
Editor
Editor
Original Music Composer
Executive Producer
Producer
Producer
Producer
Producer
Casting
Production Design
Art Direction
Casting
Set Decoration
Costume Design
Costume Design
Music Supervisor
Conceptual Design
Makeup Department Head
Stunts
CGI Supervisor
Script Supervisor
Special Effects Coordinator
Music Editor
Script Supervisor
Music Editor
Script Supervisor
Music Editor


In [189]:
# Apply literal_eval to convert stringified dictionaries to dictionaries
df1["crew"] = df1["crew"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Extract names of director, producer, and screenplay writer from crew list
df1["director"] = df1["crew"].apply(lambda x: [d["name"] for d in x if d["job"] == "Director"][0] if [d["job"] for d in x if d["job"] == "Director"] else None)
df1["producer"] = df1["crew"].apply(lambda x: [d["name"] for d in x if d["job"] == "Producer"])
df1["screenplay_writer"] = df1["crew"].apply(lambda x: [d["name"] for d in x if d["job"] == "Screenplay"])

Reasons for including the following crew members:

Director: The director has a significant impact on the overall vision and style of a movie, including the tone, pacing, camera work, and performance direction. Movies directed by the same director may have similar themes, visual styles, or narrative techniques, which can be used for recommendation purposes.

Screenplay writer: The screenplay is the foundation of a movie, providing the story, characters, dialogue, and structure. Similarities between movies based on the same source material or with similar themes, genres, or narrative structures can be identified and used for recommendation.

Producer: The producer oversees the financial and logistical aspects of a movie, including casting, hiring, scheduling, and marketing. The production company or studio associated with a movie may have a specific brand or target audience, which can be used for recommendation purposes. Additionally, producers may have a track record of successful movies or collaborations with specific directors or actors, which can also be used as a recommendation feature.

In [190]:
df1.head()

Unnamed: 0,movie_id,title,cast,crew,cast_names,director,producer,screenplay_writer
0,19995,Avatar,"[{'cast_id': 242, 'character': 'Jake Sully', '...","[{'credit_id': '52fe48009251416c750aca23', 'de...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",James Cameron,"[James Cameron, Jon Landau]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End,"[{'cast_id': 4, 'character': 'Captain Jack Spa...","[{'credit_id': '52fe4232c3a36847f800b579', 'de...","[Johnny Depp, Orlando Bloom, Keira Knightley, ...",Gore Verbinski,"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Ted Elliott, Terry Rossio]"
2,206647,Spectre,"[{'cast_id': 1, 'character': 'James Bond', 'cr...","[{'credit_id': '54805967c3a36829b5002c41', 'de...","[Daniel Craig, Christoph Waltz, Léa Seydoux, R...",Sam Mendes,"[Barbara Broccoli, Michael G. Wilson]","[John Logan, Robert Wade, Neal Purvis, Jez But..."
3,49026,The Dark Knight Rises,"[{'cast_id': 2, 'character': 'Bruce Wayne / Ba...","[{'credit_id': '52fe4781c3a36847f81398c3', 'de...","[Christian Bale, Michael Caine, Gary Oldman, A...",Christopher Nolan,"[Charles Roven, Christopher Nolan, Emma Thomas]","[Christopher Nolan, Jonathan Nolan]"
4,49529,John Carter,"[{'cast_id': 5, 'character': 'John Carter', 'c...","[{'credit_id': '52fe479ac3a36847f813eaa3', 'de...","[Taylor Kitsch, Lynn Collins, Samantha Morton,...",Andrew Stanton,"[Colin Wilson, Jim Morris, Lindsey Collins]","[Andrew Stanton, Michael Chabon, Mark Andrews]"


In [191]:
# describe NaN and empty cells in each column
for col in df1.columns:
    nan_count = df1[col].isna().sum()
    empty_count = df1[col].eq('').sum()
    print(f"Column {col}: NaN count = {nan_count}, Empty count = {empty_count}")

Column movie_id: NaN count = 0, Empty count = 0
Column title: NaN count = 0, Empty count = 0
Column cast: NaN count = 0, Empty count = 0
Column crew: NaN count = 0, Empty count = 0
Column cast_names: NaN count = 0, Empty count = 0
Column director: NaN count = 30, Empty count = 0
Column producer: NaN count = 0, Empty count = 0
Column screenplay_writer: NaN count = 0, Empty count = 0


In [192]:
df1[df1['director'].isnull()]['title']

3661                                            Flying By
3670                                      Running Forever
3729                                                  Paa
3977                                   Boynton Beach Club
4068                                            Sharkskin
4105      The Book of Mormon Movie, Volume 1: The Journey
4118                               Hum To Mohabbat Karega
4123                                       Roadside Romeo
4247                                Me You and Five Bucks
4305                            Down & Out With The Dolls
4314                                            Crowsnest
4322                                   Sex With Strangers
4374                                Dream with the Fishes
4401                                  The Helix... Loaded
4405                                    Karachi se Lahore
4458                                  Harrison Montgomery
4504                              Light from the Darkroom
4553          

In [193]:
dict_director = {
    'Flying By': 'Jim Amatulli',
    'Running Forever': 'JAMES CHIU',
    'Paa': 'R. Balki',
    'Boynton Beach Club': 'Susan Seidelman',
    'Sharkskin': 'Dan Perri',
    'The Book of Mormon Movie, Volume 1: The Journey': 'Gary Rogers',
    'Hum To Mohabbat Karega': 'Kundan Shah',
    'Roadside Romeo': 'Jugal Hansraj',
    'Me You and Five Bucks': 'Jaime Zevallos',
    'Down & Out With The Dolls': 'Kurt Voss',
    'Crowsnest': 'Jordan Marder',
    'Sex With Strangers': 'John T. Connor',
    'Dream with the Fishes': 'Finn Taylor',
    'The Helix... Loaded': 'Arika Lisanne Mittman',
    'Karachi se Lahore': 'Wajahat Rauf',
    'Harrison Montgomery': 'Daniel Davila',
    'Light from the Darkroom': 'Lance McDaniel',
    'America Is Still the Place': 'Patrick Gilles',
    'The Little Ponderosa Zoo': 'Luke Dye',
    'Diamond Ruff': 'Alec Asten',
    'Rise of the Entrepreneur: The Search for a Better Way': 'Erin Giles',
    'I Want Your Money': 'Ray Griggs',
    'Fabled': 'Ari Kirschenbaum',
    'Death Calls': 'Ken Del Conte',
    "Amidst the Devil's Wings": 'Daniel Columbie',
    'Teeth and Blood': 'Al Franklin',
    'UnDivided': 'Drew S. Takahashi',
    'Little Big Top': 'Stephen Tracey',
    'Short Cut to Nirvana: Kumbh Mela': 'Maurizio Benazzo, Nick Day',
    'The Blood of My Brother: A Story of Death in Iraq': 'Andrew Berends'
}

In [194]:
for title, director in dict_director.items():
    df1.loc[df1['title'] == title, 'director'] = director

In [195]:
# describe NaN and empty cells in each column
for col in df1.columns:
    nan_count = df1[col].isna().sum()
    empty_count = df1[col].eq('').sum()
    print(f"Column {col}: NaN count = {nan_count}, Empty count = {empty_count}")

Column movie_id: NaN count = 0, Empty count = 0
Column title: NaN count = 0, Empty count = 0
Column cast: NaN count = 0, Empty count = 0
Column crew: NaN count = 0, Empty count = 0
Column cast_names: NaN count = 0, Empty count = 0
Column director: NaN count = 0, Empty count = 0
Column producer: NaN count = 0, Empty count = 0
Column screenplay_writer: NaN count = 0, Empty count = 0


In [196]:
# describe NaN and empty cells in each df2 column
for col in df2.columns:
    nan_count = df2[col].isna().sum()
    empty_count = df2[col].eq('').sum()
    print(f"Column {col}: NaN count = {nan_count}, Empty count = {empty_count}")

Column budget: NaN count = 0, Empty count = 0
Column genres: NaN count = 0, Empty count = 0
Column homepage: NaN count = 3091, Empty count = 0
Column id: NaN count = 0, Empty count = 0
Column keywords: NaN count = 0, Empty count = 0
Column original_language: NaN count = 0, Empty count = 0
Column original_title: NaN count = 0, Empty count = 0
Column overview: NaN count = 31, Empty count = 0
Column popularity: NaN count = 0, Empty count = 0
Column production_companies: NaN count = 0, Empty count = 0
Column production_countries: NaN count = 0, Empty count = 0
Column release_date: NaN count = 1, Empty count = 0
Column revenue: NaN count = 0, Empty count = 0
Column runtime: NaN count = 2, Empty count = 0
Column spoken_languages: NaN count = 0, Empty count = 0
Column status: NaN count = 0, Empty count = 0
Column tagline: NaN count = 844, Empty count = 0
Column title: NaN count = 0, Empty count = 0
Column vote_average: NaN count = 0, Empty count = 0
Column vote_count: NaN count = 0, Empty cou

In [197]:
#Unpacking Genres

# convert the 'genres' column to dtype object
df2['genres'] = df2['genres'].astype(object)

# specify the key to extract
key = 'name'

# create a new column called 'genre_list'
df2['genre_list'] = ''

# loop through the values in the 'genres' column and extract the values for the specified key
for i, genre_list in enumerate(df2['genres']):
    genre_values = []
    # check if the value is a string and convert it to a list of dictionaries if necessary
    if isinstance(genre_list, str):
        genre_list = ast.literal_eval(genre_list)
    for genre_dict in genre_list:
        if genre_dict.get(key):
            genre_values.append(genre_dict[key])
    df2.at[i, 'genre_list'] = genre_values

In [198]:
#Unpacking Keywords

# convert the 'keywords' column to dtype object
df2['keywords'] = df2['keywords'].astype(object)

# specify the key to extract
key = 'name'

# create a new column called 'keywords_unpacked'
df2['keywords_unpacked'] = ''

# loop through the values in the 'keywords' column and extract the values for the specified key
for i, keywords_unpacked in enumerate(df2['keywords']):
    values = []
    # check if the value is a string and convert it to a list of dictionaries if necessary
    if isinstance(keywords_unpacked, str):
        keywords_unpacked = ast.literal_eval(keywords_unpacked)
    for key_dict in keywords_unpacked:
        if key_dict.get(key):
            values.append(key_dict[key])
    df2.at[i, 'keywords_unpacked'] = values

In [199]:
# Movies with no overview
nan_titles = []
for idx, row in df2.iterrows():
    if pd.isna(row['overview']):
        nan_titles.append(row['original_title'])

In [200]:
# Creating a dictionary with movie descriptions for blank movies, manually filled out.
dict1 = {
'The Dark Knight': 'A superhero action-thriller where Batman battles the Joker to save Gotham City. The movie showcases intense fight scenes and explores the dark psychological struggles of the characters.',
'Inside Out': 'An animated adventure movie that explores the emotions and memories of a young girl. The film provides a creative portrayal of complex emotions and has a heartwarming message about growing up.',
'Guardians of the Galaxy': 'A superhero space opera where a group of misfits team up to save the galaxy from a powerful villain. The movie features an eclectic soundtrack and a mix of action, humor, and heart.',
'Interstellar': 'A science fiction movie where a group of astronauts travel through a wormhole in search of a new home for humanity. The film combines stunning visuals with complex theories of space and time.',
'Inception': 'A mind-bending heist movie where a thief steals information by entering people’s dreams. The movie is known for its intricate plot and stunning visual effects that keep the audience on the edge of their seats.',
'The Lord of the Rings: The Fellowship of the Ring': 'An epic fantasy movie where a young hobbit must destroy a powerful ring to save Middle-earth from evil. The movie features breathtaking landscapes, epic battles, and memorable characters.',
'Django Unchained': 'A western drama where a freed slave teams up with a bounty hunter to rescue his wife from a brutal plantation owner. The film is known for its gritty realism and powerful performances.',
'The Wolf of Wall Street': 'A biographical black comedy movie that follows the rise and fall of a corrupt stockbroker. The movie showcases the excess and corruption of Wall Street in the 1990s and features a dynamic performance from Leonardo DiCaprio.',
'The Lord of the Rings: The Return of the King': 'The final installment of the epic fantasy trilogy where the fate of Middle-earth is decided in a climactic battle. The movie provides a satisfying conclusion to the story with stunning action sequences and emotional moments.',
'The Lord of the Rings: The Two Towers': 'The second installment of the epic fantasy trilogy where the fellowship is scattered and faces new challenges. The movie features epic battles and a deeper exploration of the characters and their motivations.',
'The Lion King': 'An animated musical movie where a young lion prince must reclaim his throne from his treacherous uncle. The movie features memorable songs and breathtaking animation that brings the African savannah to life.',
'The Matrix': 'A science fiction action movie where a hacker discovers the truth about reality and leads a rebellion against intelligent machines. The movie features groundbreaking special effects and a thought-provoking exploration of reality and identity.',
'Fight Club': 'A psychological drama movie where an insomniac office worker forms a secret club that evolves into a violent anarchist movement. The movie features an unreliable narrator and a subversive critique of consumer culture.',
'The Green Mile': 'A supernatural drama movie where a prison guard discovers that an inmate on death row has miraculous healing powers. The movie explores themes of justice, morality, and redemption with powerful performances from the cast.',
'Forrest Gump': 'A comedy-drama movie that follows the life of a simple man who unwittingly becomes part of some of the defining moments of the 20th century. The movie features a heartwarming message about the power of kindness and perseverance.',
'Se7en': 'A crime thriller movie where two detectives track down a serial killer who uses the seven deadly sins as his inspiration. The movie is known for its gritty atmosphere and suspenseful storytelling.',
 "Schindler's List": 'A poignant historical drama that depicts the heroic acts of a German businessman who risks everything to save the lives of Jewish refugees during the Holocaust.',
"The Shawshank Redemption": 'An emotionally powerful prison drama depicting the enduring friendship between two inmates, amidst the harsh realities of incarceration.',
"The Empire Strikes Back": 'A thrilling space epic that continues the Star Wars saga, as the rebels face new challenges and the Force is further explored.',
"The Silence of the Lambs": 'A chilling psychological thriller that delves into the mind of a cannibalistic serial killer, as an FBI agent races against time to catch another killer on the loose.',
"Back to the Future": 'A time-traveling adventure-comedy that is full of action, humor, and heart, as a teenager tries to fix the past and secure his future.',
"千と千尋の神隠し": 'A captivating and visually stunning animated film that takes the audience on a magical journey through a mystical world filled with strange creatures and enigmatic spirits.',
"The Imitation Game": 'A gripping historical drama that pays tribute to a brilliant mathematician and codebreaker who played a pivotal role in saving countless lives during World War II.',
"Chiamatemi Francesco - Il Papa della gente": 'An inspiring biographical film that tells the story of a man who dedicated his life to serving others and became a beacon of hope and compassion for millions around the world.',
"The Godfather: Part II": 'A complex and riveting crime drama that explores the rise of a powerful mafia family and the struggles of its heirs to maintain control and protect their loved ones.',
"Star Wars": 'An iconic and imaginative sci-fi adventure that takes the audience on a thrilling ride through a galaxy far, far away, as a group of rebels fight to overthrow an oppressive regime.',
"Pulp Fiction": 'A groundbreaking crime film that blends humor, violence, and pop culture in a way that defies expectations and challenges conventions.',
"The Godfather": 'A masterful crime drama that explores the dark side of power and loyalty, as a patriarch of a powerful mafia family struggles to maintain his authority and protect his loved ones.',
"Whiplash": 'A powerful and intense drama that delves into the obsessive world of music, as a young drummer is pushed to his limits by a demanding and abusive instructor.',
"To Be Frank, Sinatra at 100": 'A fascinating documentary that celebrates the life and legacy of one of the greatest entertainers of all time, as friends, family, and fellow musicians reflect on his impact and influence.',
"Food Chains": 'A thought-provoking documentary that sheds light on the harsh realities of farm labor in America, as workers fight for their rights and fair treatment in a system that often exploits them.'
}

In [201]:
#Filling in empty overview values using corresponding values from dict1 into df2:

# Iterate over keys in dict1
for key in dict1:
    # Check if key matches any value in "title" column of df2
    mask = df2['title'] == key
    if mask.any():
        # Update "overview" column with value from dict1
        df2.loc[mask, 'overview'] = dict1[key]

In [202]:
# Drop one of the 'title' columns based on the duplicate mask
df2 = df2.drop('title', axis=1)

In [203]:
# Combining the dataframes by concatenating them horizontally
df = pd.concat([df1, df2], axis=1)

In [204]:
df.head()

Unnamed: 0,movie_id,title,cast,crew,cast_names,director,producer,screenplay_writer,budget,genres,...,release_date,revenue,runtime,spoken_languages,status,tagline,vote_average,vote_count,genre_list,keywords_unpacked
0,19995,Avatar,"[{'cast_id': 242, 'character': 'Jake Sully', '...","[{'credit_id': '52fe48009251416c750aca23', 'de...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",James Cameron,"[James Cameron, Jon Landau]",[James Cameron],237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",...,2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,7.2,11800,"[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon..."
1,285,Pirates of the Caribbean: At World's End,"[{'cast_id': 4, 'character': 'Captain Jack Spa...","[{'credit_id': '52fe4232c3a36847f800b579', 'de...","[Johnny Depp, Orlando Bloom, Keira Knightley, ...",Gore Verbinski,"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Ted Elliott, Terry Rossio]",300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",...,2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",6.9,4500,"[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ..."
2,206647,Spectre,"[{'cast_id': 1, 'character': 'James Bond', 'cr...","[{'credit_id': '54805967c3a36829b5002c41', 'de...","[Daniel Craig, Christoph Waltz, Léa Seydoux, R...",Sam Mendes,"[Barbara Broccoli, Michael G. Wilson]","[John Logan, Robert Wade, Neal Purvis, Jez But...",245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",...,2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,6.3,4466,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi..."
3,49026,The Dark Knight Rises,"[{'cast_id': 2, 'character': 'Bruce Wayne / Ba...","[{'credit_id': '52fe4781c3a36847f81398c3', 'de...","[Christian Bale, Michael Caine, Gary Oldman, A...",Christopher Nolan,"[Charles Roven, Christopher Nolan, Emma Thomas]","[Christopher Nolan, Jonathan Nolan]",250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",...,2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,7.6,9106,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i..."
4,49529,John Carter,"[{'cast_id': 5, 'character': 'John Carter', 'c...","[{'credit_id': '52fe479ac3a36847f813eaa3', 'de...","[Taylor Kitsch, Lynn Collins, Samantha Morton,...",Andrew Stanton,"[Colin Wilson, Jim Morris, Lindsey Collins]","[Andrew Stanton, Michael Chabon, Mark Andrews]",260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",...,2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",6.1,2124,"[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel..."


In [205]:
# Get a list of all columns in the dataframe
columns = df.columns.tolist()
columns

['movie_id',
 'title',
 'cast',
 'crew',
 'cast_names',
 'director',
 'producer',
 'screenplay_writer',
 'budget',
 'genres',
 'homepage',
 'id',
 'keywords',
 'original_language',
 'original_title',
 'overview',
 'popularity',
 'production_companies',
 'production_countries',
 'release_date',
 'revenue',
 'runtime',
 'spoken_languages',
 'status',
 'tagline',
 'vote_average',
 'vote_count',
 'genre_list',
 'keywords_unpacked']

In [206]:
# Droping unnecessary columns from the dataframe
df = df.drop(['cast', 'crew', 'genres','popularity','release_date','production_companies','production_countries','homepage', 'id', 'keywords','original_language','original_title','revenue','spoken_languages','status'],axis=1)

In [207]:
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,vote_average,vote_count,genre_list,keywords_unpacked
0,19995,Avatar,"[Sam Worthington, Zoe Saldana, Sigourney Weave...",James Cameron,"[James Cameron, Jon Landau]",[James Cameron],237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,7.2,11800,"[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon..."
1,285,Pirates of the Caribbean: At World's End,"[Johnny Depp, Orlando Bloom, Keira Knightley, ...",Gore Verbinski,"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Ted Elliott, Terry Rossio]",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",6.9,4500,"[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ..."
2,206647,Spectre,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...",Sam Mendes,"[Barbara Broccoli, Michael G. Wilson]","[John Logan, Robert Wade, Neal Purvis, Jez But...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,6.3,4466,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi..."
3,49026,The Dark Knight Rises,"[Christian Bale, Michael Caine, Gary Oldman, A...",Christopher Nolan,"[Charles Roven, Christopher Nolan, Emma Thomas]","[Christopher Nolan, Jonathan Nolan]",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,7.6,9106,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i..."
4,49529,John Carter,"[Taylor Kitsch, Lynn Collins, Samantha Morton,...",Andrew Stanton,"[Colin Wilson, Jim Morris, Lindsey Collins]","[Andrew Stanton, Michael Chabon, Mark Andrews]",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",6.1,2124,"[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel..."


In [208]:
# Get the number of NaN and empty cells in each column
nan_counts = df.isna().sum()
empty_counts = (df.fillna('') == '').sum()

# Print the results
print("NaN counts:\n", nan_counts)
print("\nEmpty counts:\n", empty_counts)

NaN counts:
 movie_id               0
title                  0
cast_names             0
director               0
producer               0
screenplay_writer      0
budget                 0
overview               1
runtime                2
tagline              844
vote_average           0
vote_count             0
genre_list             0
keywords_unpacked      0
dtype: int64

Empty counts:
 movie_id               0
title                  0
cast_names             0
director               0
producer               0
screenplay_writer      0
budget                 0
overview               1
runtime                2
tagline              844
vote_average           0
vote_count             0
genre_list             0
keywords_unpacked      0
dtype: int64


In [209]:
print(df.dtypes)

movie_id               int64
title                 object
cast_names            object
director              object
producer              object
screenplay_writer     object
budget                 int64
overview              object
runtime              float64
tagline               object
vote_average         float64
vote_count             int64
genre_list            object
keywords_unpacked     object
dtype: object


In [210]:
# Count null values
num_null = df['tagline'].isnull().sum()

# Count empty values
num_empty = df['tagline'].eq('').sum()

# Total number of null and empty values
total_null_empty = num_null + num_empty

print("Number of null values in 'tagline' column:", num_null)
print("Number of empty values in 'tagline' column:", num_empty)
print("Total number of null and empty values in 'tagline' column:", total_null_empty)

Number of null values in 'tagline' column: 844
Number of empty values in 'tagline' column: 0
Total number of null and empty values in 'tagline' column: 844


In [211]:
#filling out empty taglines
empty_tag = {'Superman Returns': 'On June 30, 2006! Look Up In The Sky!', 'Star Trek Beyond': 'The Final Chapter of The Original Trilogy', 'Up': 'Fly Up to Venezuela', 'The Jungle Book': 'The legend will never be the same.', 'Angels & Demons': 'The holiest event of our time. Perfect for their return.', 'G.I. Joe: Retaliation': 'When all else fails, they don’t.', 'Astérix aux Jeux Olympiques': '', 'The Hunchback of Notre Dame': 'Join the party!', '金陵十三釵': 'The emotional epic of the year.', 'The Nutcracker: The Untold Story': 'All you need is a little imagination.', 'Fantasia 2000': 'An extraordinary blend of sights and sounds.', 'Évolution': '', 'Don Gato: El inicio de la pandilla': '', 'Anna and the King': 'The story of a woman who could only be tamed by love.', 'Hollywood Homicide': "When time is running out, one shot is all you've got", 'Ballistic: Ecks vs. Sever': 'One wanted justice, the other wanted revenge.', 'Red Dawn': 'Welcome to the home of the brave', 'Proof of Life': 'Suspense at its highest.', 'Oliver Twist': 'Let the advernture begin', 'The Horse Whisperer': 'Discover the healing power of love.', 'Joy': 'Invention. Success. Yours.', 'Sinbad: Legend of the Seven Seas': 'The greatest voyage is discovering who you are.', '西游记之孙悟空三打白骨精': '', 'The Ridiculous 6': 'The Western spoof you’ve been waiting for', "Captain Corelli's Mandolin": 'Love is a force you cannot resist.', 'Maid in Manhattan': 'This Christmas ... love checks in.', 'The Pursuit of Happyness': 'Inspired by true events', "Mr. Popper's Penguins": 'Family, it is for the birds.', 'Instinct': 'When the mind is the weapon, the body is expendable', 'Thunderbirds': 'Imagine the impossible.', 'The Book of Life': 'From producer Guillermo del Toro comes a vibrant animated film inspired by Mexico’s Day of the Dead.', 'Beverly Hills Cop III': 'He is back in Beverly Hills. And the heat is back on!', 'The Story of Us': 'Can a marriage survive 15 years of marriage?', 'Joan of Arc': '', 'Panic Room': 'It was supposed to be the safest room in the house', 'The Tooth Fairy': 'You cant handle the tooth.', '1947: Earth': '', 'Солярис': '', 'Fat Albert': 'Hey! Hey! Hey! Christmas Day.', 'The Indian in the Cupboard': 'Adventure comes to life', 'The Pledge': 'Some promises are deadly', 'The Producers': "I want everything I've ever seen in the movies!", 'Miracle at St. Anna': 'World War II has its heroes and its miracles.', 'Shadow Conspiracy': 'Trust No One. Believe Nothing. Watch Your Back.', 'The Medallion': 'When a Hong Kong cop is magically transformed into a hero, even he doesnt believe in!', 'Jersey Boys': 'Everybody remembers it how they need to.', 'Kiss of Death': 'From the director of "The Terminator" and "Aliens."', 'Ishtar': 'The entertainment event of the year.', 'Life or Something Like It': 'Destiny is what you make of it.', 'New York Minute': 'Anything can change in a New York minute.', 'Duplex': 'Alex Rose and Nancy Kendricks are about to teach you a thing or two about happily ever after.', 'Soul Men': 'The legends are back.', 'Delgo': 'In a divided land, a troubled youth and some unlikely friends must save the world from itself.', '投名狀': '', 'Un monstre à Paris': 'From the team that brought you "A Monster in Paris."', 'The Last Shot': 'Based on the true story of the greatest movie never made.', 'Baahubali: The Beginning': "India's biggest motion picture.", "The Time Traveler's Wife": "The Time Traveler's Wife - Beyond Time. Beyond Love. Beyond Death.", 'Against the Ropes': 'She gave the boxing world the one-two punch they never saw coming.', "Dragon Nest: Warriors' Dawn": 'The legend begins', 'White Squall': 'Based on a true story of the sea.', 'Texas Rangers': 'An American legend comes to life.', 'Obitaemyy Ostrov': '', 'Head of State': 'The only thing white is the house.', '葉問3': 'The legend continues, the grandmaster returns.', 'There Be Dragons': 'Discover the conflict that shaped the world.', 'TMNT': 'Raising Shell In 2007', 'Malcolm X': 'His father was murdered. His mother was institutionalized. He became a powerful voice for change.', 'Must Love Dogs': 'The hardest trick is making them stay.', 'Midnight in the Garden of Good and Evil': 'A lush crime mystery.', 'Hoffa': "He didn't want law. He wanted justice.", 'Le Hussard sur le toit': '', 'Black Water Transit': '', 'Playing for Keeps': 'This holiday season, what do you really want?', 'A Few Good Men': "In the heart of the nation's capital, in a courthouse of the U.S. government, one man will stop at nothing to keep his honor, and one will stop at nothing to find the truth.", 'The Young and Prodigious T.S. Spivet': 'adventure runs in the family', 'Dreamer: Inspired By a True Story': 'Dream for the fences', 'Bringing Out the Dead': 'Saving a life is a hell of a job', 'Cradle Will Rock': 'Art is never dangerous', 'George and the Dragon': 'The Quest Begins...', 'Criminal': "They're the best there is at what they do", 'Flight': "Lie. Cheat. Steal. All In A Day's Work.", 'Toy Story': 'Hang on for the comedy that goes to infinity and beyond!', 'The Vow': 'Inspired by True Events', 'Down to Earth': 'A story of premature reincarnation', 'Midnight in Paris': 'The past is a present.', 'Secondhand Lions': 'Experience a lifetime of adventure in just one summer', 'Deliver Us from Evil': "You haven't seen true evil", 'The Imaginarium of Doctor Parnassus': 'The man who tried to cheat the devil.', 'The Merchant of Venice': 'The Merchant of Venice', 'Supercross': 'Ride Hard. Live Fast.', 'Le petit Nicolas': 'Joyous, funny, touching: the adventures of a mischievous schoolboy.', 'Konferenz der Tiere': "Brace yourself for the world's greatest migration!", 'Goodbye Bafana': "Based on the true story of Nelson Mandela's prison guard.", 'Grace of Monaco': 'The greatest role Grace Kelly would ever play.', "Ripley's Game": "It's better to be a fake somebody than a real nobody.", 'Cry Freedom': 'The true story of the friendship that shook South Africa and awakened the world.', 'The Rugrats Movie': 'The adventure of a lifetime, the adventure of two lifetimes.', 'キャプテンハーロック': 'The legendary space pirate, Captain Harlock, returns in a stunning new adventure.', '3 Days to Kill': 'The countdown is on.', 'Lost Souls': '', 'Le peuple migrateur': '', 'Kundun': '', 'Alatriste': '', 'Spy Kids: All the Time in the World': 'A 4D experience.', 'Flawless': '', 'The Magic Flute': '', 'Welcome to Mooseport': "May the best loser win. When they put their heads together... it's a no brainer.", 'The Lucky One': '', 'Anacondas: The Hunt for the Blood Orchid': 'The hunters will become the hunted.', 'Dance Flick': '', 'Ice Princess': "From rink to runway... she's got it all!", 'Punch-Drunk Love': '', 'Half Past Dead': 'The worst prison movie ever made.', 'Bright Lights, Big City': 'A story about the times we live in.', "Angela's Ashes": '', 'Un Plan parfait': '', 'Gettysburg': 'Same Land. Same God. Different Dreams.', 'Sheena': '', 'Underclassman': '', "Say It Isn't So": 'Finding love is hard enough. But try telling that to your father.', "The World's Fastest Indian": 'The inspiring true story of a man and his dream.', "King's Ransom": 'It Pays To Be A Winner', 'Blindness': 'Your vision of the world will change', 'Where the Truth Lies': '', 'La véritable histoire du Chat Botté': '', 'Белка и Стрелка. Звёздные собаки': '', 'Waterloo': '', 'Jane Got a Gun': 'Strength, justice, power, vengeance.', 'Think Like a Man Too': '', 'Footloose': 'This is our time', 'Play It to the Bone': 'No one hits as hard as your best friends.', 'The Thief and the Cobbler': 'The Greatest Thief In All The Glittering World of Adventure!', 'The Bridge of San Luis Rey': 'Five Lives Bound By One Fate.', 'The Crew': 'Meet the dirtiest guys in the dirtiest job you will ever see!', 'The Longshots': 'The New Coach Has A Secret Weapon.', 'The End of the Affair': 'The end was just the beginning.', 'Forsaken': 'In a time of war, the ultimate weapon is survival.', 'Chéri': '', 'Shadowlands': 'He thought that magic only existed in books, and then he met her.', 'Mad Money': '', 'Molière': '', 'To Rome with Love': '', 'Away We Go': '', 'Moonlight Mile': 'In life and love, expect the unexpected.', 'Beverly Hills Cop II': 'The Heat is Back On!', 'The Tigger Movie': '', 'The American': '', 'The Bank Job': 'The true story of a heist gone wrong... in all the right ways.', 'The Greatest Story Ever Told': 'The mightiest motion picture ever made... in the telling of its story of the first 33 years of Christ.', 'Bullets Over Broadway': '', 'The Quiet American': 'In war, the most powerful weapon is seduction.', 'Mrs Henderson Presents': 'The show must go on, but the clothes must come off.', 'Everyone Says I Love You': "Written and Directed by Woody Allen - Let's Misbehave", 'Code Name: The Cleaner': '', 'Passchendaele': 'In war, hope is the deadliest weapon', 'Synecdoche, New York': 'Theater director Caden Cotard is mounting a new play. His life catering to suburban blue-hairs at the localcommunity center, while his own psyche teeters on the edge.', 'Bon voyage': 'The romantic comedy thats a total blast!', 'The Oogieloves in the Big Balloon Adventure': '', 'By the Sea': 'Innocence is overrated', 'The Game of Their Lives': 'A true story of the triumph of the human spirit', 'Rapa Nui': 'In a world of breathtaking beauty and brutal savagery, a people and their traditions struggle to survive', 'People I Know': 'Some people know too much.', 'The Tempest': 'Prospero is a director, his magic a movie and his daughter is Ariel.', '三国之见龙卸甲': '', 'ராமானுஜன்': '', 'Dwegons': '', 'Der Baader Meinhof Komplex': 'The children of the Nazi generation vowed fascism would never rule their world again.', 'I Can Do Bad All By Myself': 'Hope is closer than you think.', '風暴': 'A battle between evil and justice.', '逃出生天': '', 'Catch-22': 'The anti-war satire of epic proportions', 'Raging Bull': 'The greatest achievement in Martin Scorsese’s brilliant career!', 'Fame': 'Remember my name.', 'And So It Goes': 'Theres nothing harder than love...except maybe advice.', 'Labor Day': 'From the director of "Juno" and "Up in the Air".', 'Hamlet': "The extraordinary adaptation of Shakespeare's classic tale of vengeance and tragedy.", 'Das weisse Band': 'Every truth hides another.', 'Restoration': 'He was a man who had everything, but nothing he needed.', 'I Come with the Rain': 'Haunted by the memory of a detective whose mission was to catch a serial killer.', 'Madea Goes to Jail': "Madea's about to get more than she bargained for.", 'City of Ghosts': 'Where you are the outsider', 'Larry the Cable Guy: Health Inspector': 'Larrys a tip-top health inspector with an eye for inspection, hoo-wee!', 'How She Move': 'Shes dancing for a better future.', 'Bobby Jones: Stroke of Genius': 'There are many ways to be a champion.', 'Damnation Alley': 'A journey through a post-apocalyptic nightmare', 'The Women': 'Its all about...the cast!', 'White Oleander': 'Where does a mother end and a daughter begin?', 'Radio Days': 'Woody Allen takes you back to when laughter was a way of life', 'The Immigrant': 'The American Dream has a price', 'The White Countess': 'In the sensuality of the night, a cruel fate sets a trap.', "Bienvenue chez les ch'tis": "A heartwarming comedy about Northern France's little corner of the world...", 'Enter the Void': 'Welcome to Tokyo. A neon wonderland where innocence collides with experience.', 'Zulu': 'A story of two men who fought as never before... and a woman who loved as never before.', 'The Homesman': 'The untold story of The West.', 'Juwanna Mann': 'The only way he can stay pro, is to play (like) a girl.', 'Ararat': 'In a land of history, where hope was invented and the past lies waiting to be found... an unforgettable journey of love, loss, and triumph.', 'The Yellow Handkerchief': 'Love doesn’t always happen the way we plan.', '해운대': 'Tsunami, The Biggest Disaster In Korean History.', 'Private Benjamin': "Meet the army's newest recruit. The army will never be the same again!", "Malibu's Most Wanted": 'Fun for the whole family!', 'Lawrence of Arabia': 'A Mighty Motion Picture Of Action And Adventure!', 'Wild': 'Find Your Own Way Home', 'Flicka': 'Some friendships are wilder than others.', 'A Most Wanted Man': "The world's greatest spy returns in the movie event of the year.", "VeggieTales: The Pirates Who Don't Do Anything": 'The Ultimate VeggieTales Adventure', 'Idlewild': 'Idlewild. A place to begin. A chance to start again.', 'De-Lovely': 'A love that would never die and music that would live forever.', "Barney's Great Adventure": 'The Search For The Easter Bunny Begins On Video', 'Here On Earth': 'A love that knows no boundaries.', 'Peeples': 'Meet the Peeples', 'You Will Meet a Tall Dark Stranger': 'Everyone Loves A Happy Ending.', 'The Company': 'Nothing is more dangerous than the truth', 'Crazy in Alabama': 'Sometimes you have to lose your mind to find your freedom', "Felicia's Journey": 'The search for her son was over. The search for her family was just beginning.', '刺客聶隱娘': '', 'Возвращение': '', "All The Queen's Men": 'They took on the Nazi army in a crazy, impossible mission...and won!', 'Bathory': 'The Countess Dracula. She bathed in blood!', 'Dungeons & Dragons: Wrath of the Dragon God': 'In A Time Of Heroes, A Man Will Rise', 'Chiamatemi Francesco - Il Papa della gente': '', 'Pandaemonium': 'The Sex Pistols. The birth of punk. The end of silence.', "Elle s'appelait Sarah": "You never know what you'll uncover", 'Broken Horses': 'Blood binds. Honor divides.', "My Boss's Daughter": "Don't let his daughter's killer leave you in pieces.", 'Dead Man on Campus': 'Roommate Wanted...For a Limited Time Only', 'Tea with Mussolini': "C'est la vie! It all happens in the best of families.", 'New York, New York': 'The musical that became a movie!', 'Crooklyn': 'A story of growing up, and letting go.', 'Little Children': 'Lurking beneath the surface of suburban life.', 'Soul Survivors': 'Every evil has its beginning.', 'Caravans': 'Love Knows No Bounds', 'Mr. Turner': 'Turner. The extraordinary seaman who gave a nation its greatest treasure.', 'Les herbes folles': 'Life can be unexpected.', '剑雨': '', 'The Lucky Ones': 'When you least expect it, expect it.', 'Margaret': 'Discover the untold story of the tragedy', "The Caveman's Valentine": 'A murder mystery like no other.', 'The Last Godfather': "The most dangerous man in the world... and he's never been in a gunfight.", 'Two Can Play That Game': 'Men may come and go, but the game goes on forever.', 'The Astronaut Farmer': "If we don't have our dreams, we have nothing.", 'Light It Up': 'The Revolution is about to begin.', 'Birthday Girl': 'Hold on tight!', 'Resurrecting the Champ': 'Based on a true story that was too good to be true.', 'La veuve de Saint-Pierre': 'The end of innocence.', 'Find Me Guilty': 'Sometimes the best defense. . . is a wisecrack.', 'La Guerre du feu': 'Before our time, beyond our imagination, there was a time when the first tool of man was forged from the iron of the earth, the stone of the mountains and the fire of the sky.', '태극기 휘날리며': 'One Nation, One King, One Legend.', 'Yentl': 'Anshel Brings His Act To A New Stage.', 'The Muppet Christmas Carol': 'He is a Man of Science. She is a Woman of Art. Together They Cook Up a Scheme Thats Certain to Rock the Houses of London!', 'Top Five': 'The hottest comedian of the year is on the brink of disaster.', 'Prophecy': 'The end is near.', "My Baby's Daddy": 'They are going from players to playtime.', 'How to Be a Player': 'A Player Who is About To Be Played.', 'Living Out Loud': 'A new comedy about following your own lead.', 'Rachel Getting Married': 'The fragile bonds of family and friendship.', 'Sorority Boys': 'The only way to become one of the girls... is to become one of them.', '十面埋伏': 'Survival. Honor. Revenge.', 'Cadillac Records': 'If you take the ride, you must pay the price.', 'Screwed': 'One kidnapper. One dead body. The complicating factor is... the maid.', 'For Your Consideration': 'Ready for their close-up.', 'Two Lovers': 'Sometimes we leave everything to find ourselves.', 'Last Orders': 'Four friends, one journey, no return.', 'La grande bellezza': 'Beauty will save the world... and Tragedy will change it forever.', 'The Dangerous Lives of Altar Boys': 'One of the year\'s best movies... "You\'ll laugh, you\'ll cry, you\'ll stand and cheer" - Rolling Stone.', 'Married Life': 'Do you know what really goes on in the mind of the person with whom you sleep?', 'Critical Care': 'Take two interns and call me in the morning.', 'Darling Companion': 'Some friendships go to the dogs.', 'Breakfast of Champions': 'The most twisted, funniest, and unforgettable comedy of the year.', '三枪拍案惊奇': 'From the director of "The Killer" and "Hard Boiled".', '南京!南京!': 'The Rape of Nanking. In 1937, The Japanese army invaded the city of Nanking.', 'Space Battleship Yamato': 'Our hopes are with you, Yamato!', '10 Days in a Madhouse': "A real-life depiction of investigative journalist Nellie Bly's courageous undercover stint in a mental asylum in 1887.", 'まあだだよ': 'MAADADAYO. It means "Not Yet" or "I\'m not finished" A celebration of life, love, and the mysteries of fate.', '三城记': 'In the deadly game of betrayal, one family will survive...', 'High School Musical 3: Senior Year': 'High School Graduation Gets A Musical Twist', 'Dead Man Walking': 'A soul-stirring story. A riveting performance. A film of rare humanity.', 'F.I.S.T.': 'The Fight In The Man. The Man In The Fight.', 'Inside Llewyn Davis': 'What one loves about life are the things that fade.', 'The Molly Maguires': 'They shook the world with their faith and courage!', 'Romance & Cigarettes': 'A down-and-dirty musical love story.', 'Copying Beethoven': 'The Passionate Life Of Ludwig van Beethoven', 'The Blue Butterfly': '', 'There Goes My Baby': 'The summer that changed everything.', 'September Dawn': 'The Untold Story of an American Tragedy.', 'La Famille Bélier': 'A family that will make your heart sing.', "Madea's Family Reunion": 'Learn dignity. Demand respect.', 'Roll Bounce': 'Get your groove on.', 'The Second Best Exotic Marigold Hotel': 'The More Marigold Hotel.', "Mo' Better Blues": 'A Spike Lee Joint', 'Kung Pow: Enter the Fist': 'From The Director Of Ace Ventura: When Nature Calls', 'Mud': "People just don't change overnight.", 'The Goods: Live Hard, Sell Hard': 'Talk them into it. Talk them into it. Sell, sell, sell!', 'My Week with Marilyn': 'The stars are about to align.', 'End of the Spear': 'An epic journey into the heart of adventure and discovery.', 'The Red Violin': 'An instrument of passion. A world of seduction.', 'The Straight Story': "A man's courage and a nation's hope.", 'On the Line': "He's risking his neck to help a friend...and going way over his hair!", "Io sono l'amore": 'Passion. Seduction. Betrayal. The darkest secrets are the ones we keep from ourselves.', 'Bamboozled': "Starring everybody. America's entertainment just got a little more entertaining.", 'Nicholas Nickleby': 'The greatest story Charles Dickens ever told.', 'Illuminata': 'The comedy that glows in the dark.', 'Rien ne va plus': 'Life is a game. The game is a con. The con is on.', 'Rosewater': 'The true story of the imprisonment and torture of a journalist in Iran.', 'La femme de chambre du Titanic': 'On the Titanic, love knows no class.', 'Welcome to the Rileys': 'You never know who is going to be your wake-up call.', 'Cinco de Mayo: La Batalla': 'The battle that changed history.', 'An Alan Smithee Film: Burn, Hollywood, Burn': 'The greatest movie ever made... is not worth watching.', 'The Good Guy': 'Ambition has its price.', 'Motherhood': 'There are no time-outs in... motherhood.', 'Janky Promoters': 'Some guys will do anything for a hit.', 'Eulogy': 'They came. They drank. They sang.', 'Fifty Dead Men Walking': 'When you are undercover, lies are your life.', 'Jungle Shuffle': 'Get your stripes on!', 'Adam Resurrected': 'To live is to suffer. To survive is to find meaning.', 'Hross í oss': 'Love and loss in Iceland.', 'Partition': 'A story of love and sacrifice.', 'Good Intentions': 'Sometimes it takes a criminal mind to solve a crime.', 'The Lost City': 'In a world of conflict, a man of peace broke all the rules.', 'Amour': 'Love is all you need.', 'Kites': 'Some passions never die.', 'Jab Tak Hai Jaan': 'Life is short, live it freely.', 'Mine vaganti': 'Some stories are nott meant to be kept secret.', 'The Best Man': 'The writer and director of The Wood bring you the new face of the modern romantic comedy.', 'The Savages': 'Who says you cant love life, after life?', 'The Way of the Gun': 'Prepare to cross the line.', 'The Ultimate Gift': 'Some things are worth more than money.', 'Gracie': 'The rules of the game are meant to be broken.', 'Glee: The Concert Movie': 'Biggest. Concert. Ever.', 'สุริโยไท': 'The Battle of Sun-Yod', 'Barbecue': 'The seasoning is in the blood.', 'All or Nothing': 'Sometimes the poorest of men are the richest.', 'Opal Dream': 'Some secrets can never be revealed.', 'Flammen & Citronen': 'Two men. One mission. No mercy.', 'La Fille du RER': 'Based on a true story that shocked a nation.', 'Polisse': 'Real-life drama.', 'Star Wars: Clone Wars (Volume 1)': 'Choose your side.', 'Håkon Håkonsen': 'A shipwreck. A boy. A dream. Adventure sets sail.', 'The Haunting in Connecticut 2: Ghosts of Georgia': 'Based on a true story.', '실미도': 'Based on the battle of Myeongryang.', 'Namastey London': 'Let love be your guide.', 'Yeh Jawaani Hai Deewani': 'Experience life. Live love.', 'Modern Problems': 'Poor Charlie. He is got a lot of problems.', 'The City of Your Final Destination': '', 'Enough Said': "Divorced mom Eva may be falling for Albert, a sweet, funny, like-minded divorce. But as their relationship blossoms, Eva befriends Marianne, who is always complaining about her ex-husband. When Eva realizes that Albert is the target of Marianne's rants, she begins to question her own perceptions about first impressions and second chances.", 'Held Up': 'When things get tough...con artists get creative.', 'Howards End': 'Based on the novel by E.M. Forster.', 'Anomalisa': 'Welcome to the Kaufman surreal-neorealism tale in a dull world of sameness.', 'Another Year': 'A touching story of ordinary people living ordinary lives.', 'Restless': 'From the director of "Milk".', 'The Wendell Baker Story': 'Why not call him a hero?', 'Wuthering Heights': 'Love is a force of nature', 'Aloft': 'Hope is found in the darkest places.', 'Maurice Richard': 'He gave a voice to a people, and inspired a nation.', '非常幸运': '', 'Green Street Hooligans: Underground': '', 'Loin des hommes': '', 'Shopgirl': 'Sometimes, love can be found in the most unlikely places.', 'Narc': "Truth. Honor. Loyalty. On the streets, you're only as good as your last hit.", 'Men with Brooms': 'When life gets complicated...hit the curling rink.', 'Outside Bet': '', 'Ta Ra Rum Pum': 'A Race.....Against Time', 'Persepolis': 'In a dangerous world, she held on to hope.', 'The Omega Code': '', 'The Piano': 'A love lost. A secret passion. A world of fear.', 'In Too Deep': 'To bring down a vicious underworld, they had to become one of them.', 'A Single Man': 'Every day is a journey, and the journey itself is home.', 'The Last Temptation of Christ': 'The dual substance of Christ - the yearning, so human, so superhuman, of man to attain God... has always been a deep inscrutable mystery to me. My principle anguish and source of all my joys and sorrows from my youth onward has been the incessant, merciless battle between the spirit and the flesh... And my soul is the arena where these two armies have clashed and met."', "Who's Your Caddy?": "He doesn't play golf... he destroys it.", 'A Dog Of Flanders': "The Power of Love Will Change a Boy's Life Forever", 'We Need to Talk About Kevin': 'There need to be consequences.', 'The Mighty Macs': 'She dared to dream. They dared to believe.', 'Mother and Child': 'There are no perfect arrangements.', 'March or Die': '', 'Somewhere': 'You are the entertainment.', 'I Hope They Serve Beer in Hell': 'Based on a true story... mostly.', 'Gerry': '', 'The Heart of Me': 'Passion has a price.', 'Трудно быть богом': '', 'Ca$h': 'Its not about the money. Its about revenge', 'Z風暴': 'No one is truly good or bad.', 'Alpha and Omega: The Legend of the Saw Tooth Cave': 'A pack divided...', 'High School Musical 2': 'The start of something new', 'Two Lovers and a Bear': 'The north is freedom.', 'Aimee & Jaguar': 'A love larger than death.', 'The Four Seasons': 'A story of love, marriage and friends for life.', 'Friends with Money': 'Money changes everything', 'Shame': 'How deep is too deep?', 'Layer Cake': 'The rules are simple. There are no rules.', 'The Work and the Glory II: American Zion': 'One mans faith. One familys destiny.', 'Aberdeen': 'Discover the life you have been missing.', 'Tracker': 'One mans revenge is another mans redemption.', 'Control': 'The spirit of rock and roll.', 'The Brothers': 'Sometimes, the thing you are looking for is closer than you think...', 'School Daze': 'Are you ready for the revolution?', 'Dolphins and Whales: Tribes of the Ocean': '', 'College': 'The most important four years of your life... don’t go to class.', 'Shattered Glass': 'He would do anything to get a great story. Anything.', 'Novocaine': 'Leave your baggage behind.', 'The Business of Strangers': 'Some things are meant to stay secret.', 'The Wackness': 'A lot can go down between th...', 'Morvern Callar': 'The rave scene has arrived ...', 'Beastmaster 2: Through the Portal of Time': 'One mans journey into mys...', 'La fleur du mal': '', 'The Greatest': 'Life is a leap of faith', 'Snow Flower and the Secret Fan': 'The secret of a womans he...', 'Lucky Break': '', 'Surfer, Dude': 'The Endless Summer continues...', 'Emma': 'Love knows no boundaries.', 'Stiff Upper Lips': '', 'Crossover': 'Cross over to the winning ...', 'Khiladi 786': '', 'Iris': 'Her greatest talent was for...', 'Les Choristes': '', 'Le Havre': '', 'Animals': '', 'A Room for Romeo Brass': 'Theres beauty in everyone. ...', 'Lights Out': '', 'The Return of the Pink Panther': 'That famous French detective...', 'House Party 2': '', 'Still Alice': '', 'Not Easily Broken': '', 'Digimon: The Movie': 'New Monsters. New Battles. ...', 'Saved!': '', 'Force 10 from Navarone': '', 'Standard Operating Procedure': '', 'Redacted': 'Truth is the first casualty ...', 'Fascination': '', 'Area 51': 'There are things in this p...', 'Dead Like Me: Life After Death': '', 'Henry & Me': '', 'We Have Your Husband': '', 'Dying of the Light': '', 'Born Of War': 'The battleground has chan...', 'Running Forever': '', 'Navy Seals vs. Zombies': '', 'Obsluhoval jsem anglického krále': '', 'Soul Kitchen': '', 'Take Shelter': '', 'Driving Lessons': 'A comedy about someone you ...', 'Camping Sauvage': '', 'Without Men': 'No men. No sex. No way out.', 'Dear Frankie': 'The truth heals.', 'All Hat': '', 'Requiem for a Dream': 'From the director of "P...', 'State Fair': '', 'Salvando al Soldado Perez': '', 'Karakter': '', 'Life During Wartime': '', 'Nannerl, la soeur de Mozart': '', '放‧逐': '', 'Lilja 4-Ever': '', 'Fugly': '', "R.L. Stine's Monsterville: The Cabinet of Souls": '', 'Silent Movie': '', 'Anne of Green Gables': '', 'Falcon Rising': '', 'Snabba Cash': '', 'Whale Rider': 'One young girl dared to di...', 'Paa': '', 'Cargo': '', 'Love and Death on Long Island': '', 'The Greatest Show on Earth': 'The Mightiest Motion Pictu...', 'Hansel and Gretel Get Baked': '', 'The Front Page': 'More rib-tickling, eye-...', 'The Jerky Boys': '', 'The Real Cancun': 'This Spring Break, the rea...', 'Love Stinks': '', 'Thumbsucker': 'Everyones got a different...', 'Samsara': 'Discover the story of one word.', 'The Loss of Sexual Innocence': 'A movie about love, sex and growing up.', 'Joe': "Don't try to fight it.", 'Guten Tag, Ramón': 'A story of hope, friendship and survival.', 'Adore': 'Desire can be deadly.', 'Nothing': '', 'Географ глобус пропил': '', 'How to Fall in Love': 'When true love breaks all the rules.', 'The Perfect Wave': "One man's journey in search of the perfect wave.", 'Major Dundee': 'The epic story of the Great Southwest!', 'Annie Get Your Gun': "It's the Show-Stoppen'est Show in Town!", 'Casa de Areia': 'The desert knows no mercy.', 'The Ballad of Cable Hogue': "It's the story of a man who became a legend!", 'In Cold Blood': 'The book that shook a nation now reaches the screen!', 'Gods and Monsters': 'The man who created Frankenstein. The monster who became a legend.', 'El secreto de sus ojos': 'The crime was only the beginning.', 'The Masked Saint': 'Not all heroes wear capes.', 'The Secret': 'Change your life. Change your world.', 'In the Name of the King III': 'The most daring installment yet!', 'August': 'A story about the moments that define us.', 'Dreaming of Joseph Lees': "The story of a woman's first love.", 'Feast': "They're Hungry. You're Dinner.", '归来': 'The story of a hero.', 'A Room with a View': 'Only the greatest novel of the century could spawn so many imitators.', 'Martin Lawrence Live: Runteldat': "You'll never stop laughing!", 'Spaced Invaders': 'Earth will never be the same!', "Dave Chappelle's Block Party": "It's not just a concert, it's a party!", 'Next Day Air': 'It was the wrong day to deliver.', 'Phat Girlz': "Big girls don't cry... They kick ass!", 'Woman Thou Art Loosed': 'Break the silence. Find the truth.', 'Real Women Have Curves': 'This is what real women look like.', 'Water': 'Love and faith in a world of prejudice.', 'Kama Sutra - A Tale of Love': 'In a world ruled by pleasure, love is the ultimate seduction.', 'Please Give': 'Taking care of everyone else is a full-time job.', 'Warlock: The Armageddon': 'The Son of Satan Has Returned.', 'Frances Ha': 'A story of falling down, starting over, and getting lost in the city.', 'DysFunktional Family': 'Nothing but love for ya.', 'Letters to God': 'One boys journey will change the world around him.', 'Compadres': 'Every betrayal begins with trust.', "Love's Abiding Joy": 'Will hardship strengthen their love?', 'Brave New Girl': 'Her talent made her a star, but her courage made her a hero.', "Tim and Eric's Billion Dollar Movie": 'The first film that dares to ask: "Why billion?"', 'Sommersturm': 'The story of a summer that changed everything.', 'Fort McCoy': 'There is a time to stand up and a time to lay low.', 'The Deported': 'Deportation is just the beginning.', 'Tanner Hall': 'Coming of age never looked so beautiful.', 'Open Road': 'Every journey begins with a single step.', 'Never Back Down 2: The Beatdown': 'The fighter is about to become the hunted.', 'Enter the Dangerous Mind': 'Music was his passion. Survival is his masterpiece.', 'Something Wicked': 'Fear is just the beginning.', 'Iguana': 'In the middle of nowhere, anything can happen.', 'Boynton Beach Club': 'A romantic comedy about life, love, and second chances.', "Ulee's Gold": 'The courage to care.', 'Sardaarji': 'A smart, young, fearless man.', 'Rejsen til Saturn': 'The first Danish animated movie ever.', 'De jurk': 'Fashion is murder.', 'Ida': 'The sins of the past will bind them together.', 'Maurice': 'Country life was never so scandalous.', 'Riding Giants': 'Three stories tall and surfing.', '疯狂的赛车': 'Fast, furious, and totally crazy.', 'Timber Falls': 'When the sun goes down, the terror begins.', 'Garden State': 'An adventure too weird for words.', 'Sur le seuil': 'One step away from the edge.', "Jesus' Son": 'Heroin makes him feel like a king.', 'Brick Lane': 'A journey of a woman torn between two worlds.', 'My Life Without Me': 'Live every day as if it were your last.', 'Fuel': 'You cant drive a car with a computer.', 'The Other End of the Line': 'Two countries. Two cultures. One connection.', 'Christmas Mail': 'Delivering love, one letter at a time.', 'Antibirth': 'Some things are better left unborn.', 'Thr3e': 'The only thing more terrifying than insanity is the truth.', 'Go for It!': 'Follow your heart or live to regret it.', 'Redemption Road': 'Some journeys are measured by the distance traveled. Others, by what you leave behind.', 'The Last Sin Eater': 'When the last sinner dies, the first forgiveness will arrive.', 'Do You Believe?': 'Experience the power of the Cross.', 'Impact Point': 'In the blink of an eye, she lost everything.', 'The Valley of Decision': 'Her love made him a king. Her secrets could make him a pawn.', 'Chicken Tikka Masala': 'A feast for the senses!', 'Elling': 'They are packed, and they are off... on the most hilarious adventure of their lives!', 'Mi America': 'Everyone has their own America.', 'Lies in Plain Sight': 'A story of family, love, and betrayal.', 'Sharkskin': 'A love story...', 'The Toxic Avenger Part II': 'Not since the Three Stooges met Frankenstein has there been such a combination of horror and hilarity!', 'Everything You Always Wanted to Know About Sex *But Were Afraid to Ask': 'Woody Allens Everything You Always Wanted to Know About Sex *But Were Afraid to Ask', 'To Kill a Mockingbird': 'The most beloved and widely read Pulitzer Prize Winner now comes vividly alive on the screen!', 'Les triplettes de Belleville': "It's not about winning. It's about not finishing last.", 'Smoke Signals': 'A new film from the heart of Native America.', "Gentleman's Agreement": 'Tenderly Played... Deeply Felt... Unforgettable!', 'Touching the Void': 'A true story of survival.', 'Me and You and Everyone We Know': 'A lonely shoe salesman and an eccentric performance artist struggle to connect in this unique take on contemporary life.', 'Vals Im Bashir': 'He enters the most hidden place on earth, the place of a man without dreams, and becomes witness to his story.', 'In the Shadow of the Moon': 'Remember when the whole world looked up.', 'Dinner Rush': 'One night in the life of a restaurant.', 'Clockwatchers': 'Punch in for laughs.', 'The Virginity Hit': 'They wanted to get laid, they got screwed.', 'House of D': 'See the world a little differently.', 'Hum To Mohabbat Karega': 'If you think you have problems... Think again...', "It's All Gone Pete Tong": 'Meet Frankie Wilde. The legend. The icon. The man.', 'Saint John of Las Vegas': 'What happens in Vegas, stays in Vegas... but sometimes it follows you home.', '24 7: Twenty Four Seven': 'Against all odds, life goes on.', 'Roadside Romeo': 'A love story that is unleashed once in a lifetime.', 'This Thing of Ours': 'The wise guys are about to get a wake-up call.', 'Freeze Frame': 'Every picture tells a story.', 'To Be Frank, Sinatra at 100': 'It was the voice that defined a generation... now hear the untold story behind the legend.', 'Bananas': 'The bananas may be fake, but the laughs are real!', 'Rockaway': 'Their world is about to get a wake-up call.', "No Man's Land: The Rise of Reeker": 'When the dead break free, all hell breaks loose!', 'Small Apartments': 'Not your average apartment.', 'Coffee Town': 'A story about friends, coffee, and the end of the world as we know it.', "Straight A's": 'When you realize getting some means wanting more', 'Slacker Uprising': '', 'Walking with the Dead': 'When the undead take over... who will take them on?', 'Northfork': 'Twin Peaks meets The Odyssey meets The Twilight Zone', 'The Marine 4: Moving Target': "They targeted his family. Now he's out for revenge.", 'Chacun sa nuit': '', 'Abandoned': 'Abandon all hope', 'Brotherly Love': 'Sometimes the hardest person to forgive is yourself', 'Higher Ground': 'The most powerful weapon is faith', 'Deadline - U.S.A.': '', 'Sublime': "You don't choose the spirits, they choose you", "A Beginner's Guide to Snuff": 'A comedy that will take you to pieces', 'Independence Daysaster': "Earth's last stand", 'Dabba': 'The taste of perfection', 'Yes': 'A passionate love affair turns into a life-altering moment of betrayal', 'N-Secure': "Protect what's yours", 'Out of the Dark': 'Fear the darkness. Fight the evil.', 'Ha-Buah': 'Three sisters. One journey.', 'दिल जो भी कहे': '', 'I Love Your Work': "There's no such thing as going too far", 'Kickboxer: Vengeance': 'One hope. One man. One chance.', 'The Gatekeepers': 'Can you ever truly know the enemy?', 'Killing Zoe': 'You only hurt the ones you love.', 'The Believer': 'The shocking true story of a young Jewish man who became a Neo-Nazi.', 'I Want Someone to Eat Cheese With': 'Life is a romantic comedy. Sometimes you just need to be cast in the right part.', 'Mooz-lum': 'One boy, one God, one chance.', 'Road Hard': 'Road Hard. Life Harder.', 'Forty Shades of Blue': 'Love is a game nobody wins.', 'Amigo': 'Freedom. Betrayal. Justice. Friendship. War. Who is your amigo?', 'Wal-Mart: The High Cost of Low Price': 'The high cost of low price.', 'Last I Heard': 'One last hit. One more chance.', 'Closer to the Moon': 'Based on the true story of one of the most incredible heists in history.', 'Mutant World': 'Beyond the apocalypse lies a new beginning.', 'Checkmate': 'Risk everything. Expect nothing.', 'Wind Walkers': 'They came for the treasure. They stayed for the kill.', 'Incident at Loch Ness': 'The truth is stranger than fiction.', 'La chambre bleue': 'In secret, they met. In secret, they loved. In secret, they live.', 'The Ballad of Gregorio Cortez': "The epic story of one man's fight for his rights.", 'Festen': 'Every family has a secret.', 'Trees Lounge': "A story about one man's search... for who knows what.", 'Vượt Sóng': 'The war took everything, except their courage and hope.', 'The Basket': 'Sometimes life can take you off course.', 'Kurmanjan Datka. Queen of the Mountains': 'The true story of a legendary woman who defied an empire.', 'The Hebrew Hammer': 'Part man. Part street. 100% kosher.', 'The 41–Year–Old Virgin Who Knocked Up Sarah Marshall and Felt Superbad About It': 'The ultimate disaster movie.', 'Forget Me Not': 'They were dying to be remembered.', 'Da Sweet Blood of Jesus': 'A Spike Lee joint.', 'Sex, Lies, and Videotape': 'A game of intimacy seduces everyone.', 'Super Troopers': 'Altered State Police.', 'The Algerian': 'The truth will find you.', 'Los insólitos peces gato': 'In the end, we are all a little fishy.', 'You Can Count on Me': "A story about family, loyalty, and other things that don't mix.", 'The Blue Bird': 'A classic fairy tale comes to life!', 'Que Horas Ela Volta?': 'In Brazil, it takes a village to raise a servant.', 'Certifiably Jonathan': 'The worlds greatest funnyman is missing.', 'Q': 'The ultimate secret is the ultimate weapon.', 'La Navaja De Don Juan': 'The razors edge of passion and danger.', 'Crowsnest': "You can't escape whats inside.", 'Bleeding Hearts': 'Sometimes love doesnt die.', 'Sex With Strangers': 'Some people like to watch. Some people like to be watched. And some people just cant look away...', "Dracula: Pages from a Virgin's Diary": 'Sometimes, the only way to get into the movies is to make one.', 'Faith Like Potatoes': 'Inspired by true events.', 'The Sleepwalker': 'A sleepwalking thriller.', "God's Not Dead 2": 'From the college classroom to the courtroom, God is Not Dead.', 'Departure': '', 'Obvious Child': 'For anyone who is ever been "what if-ed".', 'Frozen River': 'The American Dream isnt always black and white.', '20 Feet from Stardom': 'Meet the unsung heroes behind the greatest music of our time.', 'The Broken Hearts Club: A Romantic Comedy': 'Are you game?', 'Brigham City': 'Murder is a secret that should never be shared.', 'Palo Alto': 'Risk being unliked.', 'Ajami': 'Shakespearean in its scope, visceral in its impact.', 'I Origins': 'To believe, is to see.', 'Guiana 1838': 'The beginning of a legacy.', 'Lisa Picard Is Famous': 'A comedy about getting famous and throwing up!', 'A LEGO Brickumentary': 'The ultimate movie for fans of LEGO everywhere.', 'Chocolate: Deep Dark Secrets': 'A sweet treat or a recipe for disaster?', 'The Specials': 'They have superpowers, but they are just as screwed up as the rest of us.', '16 to Life': 'A romantic comedy for the rest of us.', 'Special': 'He wasnt very special until the experiment was over.', 'Sparkler': 'Go out with a bang.', 'The Helix... Loaded': 'When the future collides with the past, there is no time to waste.', 'The Jimmy Show': 'When your job is a joke... any friend is priceless.', 'Karachi se Lahore': 'Life is a journey not a destination.', 'Loving Annabelle': 'A story of first love.', 'Jimmy and Judy': 'They are in love. They kill people. What could go wrong?', 'Frat Party': 'The ultimate college party movie!', "The Party's Over": 'The party has started... and it may never end!', 'Childless': 'A twisted love story.', 'A escondidas': 'Sometimes the most dangerous place to be... is in love.', 'My Last Day Without You': 'One chance encounter can change everything.', 'Steppin: The Movie': 'To make it to the top, you have to step on a few people.', "Doc Holliday's Revenge": 'The wests deadliest dentist.', 'The Pet': 'Some pets should never be domesticated.', 'Fear Clinic': 'Face your fears.', 'Zombie Hunter': 'When the undead rise, every hunter becomes a zombie!', 'And Then Came Love': 'She thinks he is a royal pain. He thinks she is a spoiled princess. And they are both right.', 'Food Chains': 'There is nothing more essential than food.', 'The Horror Network Vol. 1': 'Do you dare watch?', 'Circumstance': 'Love, defiance, sexuality and politics collide.', "Le bonheur d'Elza": 'The past is never far.', '1982': 'Some things are worth fighting for.', 'Windsor Drive': 'Terror can come in any shape.', 'Raising Victor Vargas': 'It takes as much courage to have a crush as it does to be alone in New York City.', 'Die Büchse der Pandora': 'A captivating masterpiece of passion and betrayal.', 'Harrison Montgomery': 'Some people just need to disappear.', 'Cama adentro': 'Love, dignity and survival at a great expense.', 'Deterrence': 'Last night was the final straw. Today, War will know his opponent.', 'The Mudge Boy': 'Sometimes being different is the only way to truly fit in.', 'The Young Unknowns': 'When you are young, you make mistakes. If you are lucky, you can make them right.', 'Not Cool': 'Opposites attract, but should they?', 'Saints and Soldiers': 'Some fought for honor, others fought for love.', 'Vessel': 'She brings life into the world, even when it may not be viable.', 'Iraq for Sale: The War Profiteers': 'The story of what happens to everyday Americans when corporations go to war.', 'Aqua Teen Hunger Force Colon Movie Film for Theaters': '', 'Kill List': 'You dont understand, you werent there.', 'Rize': 'Bring It On meets the streets of L.A.', 'B-Girl': 'Her talent is her weapon.', 'Hav Plenty': 'Can love survive the weekend?', 'Starsuckers': 'The cost of fame has never been higher.', 'The Hadza: Last of the First': '', 'After': 'Your next obsession', 'Mercy Streets': 'Faith, Hope and Redemption', 'Arnolds Park': '', 'Water & Power': '', 'They Will Have to Kill Us First': 'Mali is Music', 'Light from the Darkroom': '', 'Médecin de campagne': '', 'The Harvest (La Cosecha)': '', 'Julija in Alfa Romeo': '', 'Faith Connections': '', 'Benji': 'You will believe in miracles!', 'Mad Hot Ballroom': 'Where the Waltz and Tango Meet the Streets.', 'Wordplay': '', 'Beyond the Mat': 'The movie Vince McMahon would never allow you to see!', 'Civil Brand': '', 'Lonesome Jim': 'There comes a time in life when you need to grow up...this is not that time.', 'Deceptive Practice: The Mysteries and Mentors of Ricky Jay': '', 'Kita svajonių komanda': '', 'Finishing The Game': 'The Race for the Role is On!', 'Kiss the Bride': 'It was the happiest day of their lives, until the night began.', 'The Slaughter Rule': 'Football is a game. Courage is a choice.', 'The Living Wake': 'The Ultimate Celebration of Life', '疯狂的石头': '', 'Scott Walker: 30 Century Man': '', 'Everything Put Together': '', 'Good Kill': 'The fight against terrorism starts at home.', 'The Outrageous Sophie Tucker': '', 'Girls Gone Dead': 'The Ultimate Spring Break Destination!', 'America Is Still the Place': '', 'Subconscious': '', 'El Rey de Najayo': '', 'Rodeo Girl': 'Her strength was her courage.', 'Born to Fly: Elizabeth Streb vs. Gravity': '', 'The Little Ponderosa Zoo': '', 'Straight Out of Brooklyn': 'When the future looks hopeless, you can always look to your dreams.', 'Bloody Sunday': 'On January 30th, 1972, British soldiers murdered 14 civilians.', 'Diamond Ruff': 'The heart of a champion beats in every ghetto.', 'Poultrygeist: Night of the Chicken Dead': 'Fast Food. High Caliber.', '42nd Street': 'The big-time musical hit!', 'Rise of the Entrepreneur: The Search for a Better Way': '', 'Så som i himmelen': '', 'Dogtown and Z-Boys': 'They made the rules... then broke them.', 'Gory Gory Hallelujah': 'A Redneck Rockstar vs. The Devil. Let the Armageddon Begin!', 'Tarnation': 'A family torn apart. A world in flux.', 'Love in the Time of Monsters': '', 'The Dark Hours': '', 'My Beautiful Laundrette': 'An unusual love story with your local launderette as the backdrop.', 'Show Me': 'She knows his secrets. His secrets could get her killed.', 'Trekkies': 'Resistance is Futile!', 'Murderball': 'Get into the game', 'American Ninja 2: The Confrontation': 'First it was teacher to student. Then it was father to son. Now, it is man to man.', 'Rotor DR1': 'Race for Your Life', 'The Big Swap': 'Swing out with the new partner every hour... or the bomb explodes.', 'Old Joy': 'Take the Time', '3 Backyards': 'You never know whats happening on your own street.', 'Pierrot le fou': 'The story of a man who thought he was Paul Pierrot', 'Sisters in Law': 'Justice is a woman', 'Ayurveda: Art of Being': 'An ancient science for modern times', 'Nothing But a Man': 'He wants a woman he can share his life with. She wants a man she can respect. And they both want a life that means something.', 'First Love, Last Rites': 'In every heart there is a room', 'Fighting Tommy Riley': 'One punch can change everything', 'Royal Kill': 'Nothing is what it seems', 'The Looking Glass': 'Can you survive your own mind?', 'Locker 13': 'Every locker has a secret.', 'Bizarre': 'A nightmare of sexual fantasy', 'Lovely & Amazing': 'If it was easy, everyone would do it.', 'Death Calls': 'You Cant Hang Up.', 'The Incredibly True Adventure of Two Girls In Love': 'There is a first time for everything', 'American Desi': 'Once you go desi, you never go back!', 'Love and Other Catastrophes': 'Romance isnt dead. It just smells funny.', 'I Married a Strange Person!': 'Its about compromise... (heh heh heh)', 'November': 'Dont trust anyone', 'Teeth and Blood': 'Evil has a new enemy', 'Sugar Town': 'where the laughs are cheap and the living is easy', 'The Sticky Fingers of Time': 'A time travel story that actually makes sense.', 'Sunday School Musical': 'Believing can be the greatest adventure of all', 'Rust': 'The only way out is to fight.', 'UnDivided': 'In America, there are no native terrorists', 'The Frozen': 'Its not the cold that kills you...', 'Give Me Shelter': 'A film about the animals we eat, and the people trying to save them.', 'The Big Parade': 'THE MOST POWERFUL WAR DRAMA IN THE HISTORY OF THE SCREEN! (original ad - all caps)', 'Little Big Top': 'Some people never go crazy. What truly horrible lives they must lead.', 'Along the Roadside': 'Life isnt always about following the signs', 'Short Cut to Nirvana: Kumbh Mela': 'Experience the worlds largest spiritual gathering', 'Middle of Nowhere': 'Sometimes you have to break a vow to keep a promise', 'Malevolence': 'Evil Will Always Find You.', 'Reality Show': 'In the world of Reality TV, life is not always what it seems...', 'Super Hybrid': 'Dont just watch the road, be the road.', 'Baghead': 'Four friends. One weekend. No escape.', 'Solitude': 'The mind can be a prison.', 'Chats perchés': 'A surreal, virtually plotless series of dreams centered around six middle-class people and their consistently interrupted attempts to have a meal together.', 'Ordet': 'Believe and be saved!', 'The Trials Of Darryl Hunt': 'The story of a wrongful conviction and one man’s search for justice', 'Samantha: An American Girl Holiday': 'Join Samantha on the adventure of a lifetime', 'Yesterday Was a Lie': 'A stylistic, genre-bending noir', 'Theresa Is a Mother': 'Sometimes losing everything leads to finding yourself', 'H.': 'The man behind the legend and the mystery that surrounds him!', 'Archaeology of a Woman': 'A womans search for independence in a mans world', 'Butterfly Girl': 'The extraordinary life of a young girl', 'Lumea e a mea': 'The world is mine', 'Another Earth': 'There is another you out there', 'Perfect Cowboy': 'The hardest thing to leave behind is the last thing you will remember', 'The Woman Chaser': 'One mans struggle for power, sex, and opportunity in the wild west of advertising', 'The Horse Boy': 'An incredible journey to heal his son and himself', 'Heroes of Dirt': 'Sometimes the battle follows you home', 'Antarctic Edge: 70° South': 'Scientists brave the world’s harshest conditions to unlock secrets of our planet', 'Top Spin': 'Following three teenagers as they battle their way through the world of competitive ping pong', 'Roger & Me': 'A new comedy about America', 'An American in Hollywood': 'Making it in L.A. is tougher than you think', 'Sound of My Voice': 'Would you follow her?', 'The Blood of My Brother: A Story of Death in Iraq': 'One familys sacrifice in the face of war', "Dude Where's My Dog?": '', 'Indie Game: The Movie': 'Game developers, their struggles, and their triumps', 'The Past Is a Grotesque Animal': 'An attempt to capture the essence of the animal spirit', 'Peace, Propaganda & the Promised Land': 'How Israel manipulates and distorts American public perceptions', 'Queen Crab': 'She is boiling mad', 'Supporting Characters': 'Do you know who your friends are?', 'The Dirties': 'Two best friends are filming a comedy about getting revenge on bullies. One of them is not joking.', 'Gabriela, Cravo e Canela': '', 'The Naked Ape': 'Do animals act as human beings do?', 'Counting': 'A kaleidoscopic diary film about love, death, and the eternal quest for meaning', 'The Call of Cthulhu': 'The celebrated tale of H.P. Lovecraft', 'The Image Revolution': 'The untold story of Image Comics', 'A True Story': 'Based on actual lies', 'George Washington': 'A story of love, loss, and growing up', 'The Exploding Girl': 'Life is about letting go', "The Legend of God's Gun": 'The west will never be the same', 'Mutual Appreciation': 'A love triangle without the sex', 'Her Cry: La Llorona Investigation': '', 'Funny Ha Ha': 'Love isn’t what it used to be', 'Manito': 'One family, one neighborhood, one day', 'Slacker': 'Putting the "blah blah blah" in indie cinema since 1991', 'Dutch Kills': 'Every con has its pros', 'Flywheel': 'In every mans life, theres a turning point', 'The Puffy Chair': 'A road trip. A relationship. A piece of furniture.', 'Stories of Our Lives': 'Intimacy. Trust. Vulnerability.', 'Breaking Upwards': '', 'Sanctuary: Quite a Conundrum': '', 'Cavite': '', 'Signed, Sealed, Delivered': '', 'My Date with Drew': ''}
empty_tag_2 = {'Asterix at the Olympic Games': 'See you in Athens!',
 'The Flowers of War': 'When the enemy takes all that you have... You fight back.',
 'Evolution': 'Coming to wipe that silly smile off your planet.',
 'Top Cat Begins': 'The Cat is Back!',
 'The Monkey King 2': 'The Legend Begins Anew',
 'The Messenger: The Story of Joan of Arc': 'For the good of all men, and the love of one woman, she fought to uphold justice by breaking the law.',
 'Earth': 'A nature adventure film for all ages.',
 'Solaris': 'Let us take you with us to Solaris, planet of mystery, embodiment of man’s latent conflict with the unknown. Man, face to face with his conscience, and with his past. A film that searches deeper than the eye can see, deeper than the mind can image.',
 'The Warlords': 'Brothers. Warriors. Heroes.',
 'A Monster in Paris': 'Paris had never seen such a monster.',
 'The Inhabited Island': 'Mankind conquered the cosmos. But on Earth they did not know themselves.',
 'Ip Man 3': 'When a band of brutal gangsters led by a crooked property developer make a play to take over a local school, Master Ip is forced to take a stand.',
 'The Horseman on the Roof': 'Passion. Ambition. Butterflies.',
 'Black Water Transit': 'His Family, His City, His Rules.',
 'Little Nicholas': 'Little Nicolas just wants to be a normal kid.',
 'Animals United': 'One day all the animals came together and decided to have a party.',
 'Space Pirate Captain Harlock': 'The classic manga adventure epic!',
 'Lost Souls': 'Deliver us from Evil.',
 'Winged Migration': "A bird epic. The kind of film you've never seen before. The kind of film you'll never forget.",
 'Kundun': 'The destiny of a people lies in the heart of a boy.',
 'Alatriste': 'Knight. Hero. Legend.',
 'Flawless': 'A rock-solid diamond heist thriller.',
 'The Magic Flute': 'The masterpiece adventure of Ingmar Bergman.',
 'The Lucky One': 'Some things are meant to be.',
 'Dance Flick': 'From the guys who brought you "Scary Movie".',
 'Punch-Drunk Love': 'The perfect man. The perfect story. The perfect lie.',
 "Angela's Ashes": 'The unforgettable story of life in Ireland during the 1930s.',
 'Fly Me to the Moon': 'The greatest adventure of all time begins with a small step.',
 'Sheena': 'Protector of the jungle!',
 'Underclassman': "He's undercover...and over the top.",
 'Where the Truth Lies': 'The truth is never overrated.',
 "The True Story of Puss 'n Boots": 'Join The Adventure!',
 'Space Dogs': 'The stars have never been closer.',
 'Waterloo': 'Outnumbered, Outsmarted, Outfought - Only one will be crowned.',
 'Think Like a Man Too': 'The wedding you always dreamed of.',
 'Cheri': 'Youth is wasted on the wrong people.',
 'Mad Money': 'Crime pays... in unexpected ways.',
 'Moliere': 'From rags to riches to one of the greatest writers of his time.',
 'To Rome with Love': 'The Eternal City... A pulsing beat... And one crazy weekend that will change two couples lives forever.',
 'Away We Go': "They're in search of a place to call home.",
 'The Tigger Movie': 'The one-and-only Tigger in his own big movie!',
 'The American': 'In the hidden world of assassins, a lone gunman is out for redemption.',
 'Bullets Over Broadway': 'A killer comedy!',
 'Code Name: The Cleaner': 'Cedric the Entertainer is The Cleaner.',
 'The Oogieloves in the Big Balloon Adventure': 'Get ready to sing, dance, and laugh with the whole family!',
 'Three Kingdoms: Resurrection of the Dragon': 'In an age of war, one man must unite a country to unite his people.',
 'Ramanujan': 'The man who knew infinity.',
 'Dwegons': 'Friendship has never been so magical.',
 'The Baader Meinhof Complex': "The explosive true story of Germany's most notorious domestic terrorist group.",
 'Firestorm': 'The ultimate heist requires the ultimate betrayal.',
 'Out of Inferno': 'When fire breaks out, heroes rise to the occasion.',
 'The White Ribbon': 'Every truth has a consequence.',
 'Welcome to the Sticks': "He's a postman. He's not stupid. Just unlucky.",
 'Tidal Wave': 'Nature unleashes its fury.',
 'The Assassin': 'Trained in the ways of the martial arts, she has become the most dangerous assassin in the land.',
 'The Return': "Ten years ago they vanished without a trace. Now they're back.",
 'Bathory: Countess of Blood': 'Some women would kill for eternal beauty.',
 'Chiamatemi Francesco - Il Papa della gente': 'The humble man who became a pope.',
 "Sarah's Key": 'Uncover the mystery. Discover the past.',
 'Wild Grass': 'In life, as in love, sometimes the unexpected is just what you need.',
 'Reign of Assassins': "One woman's journey to redemption will become the battle of her life.",
 'The Widow of Saint-Pierre': "A woman's passion has the power to change a man's fate.",
 'Quest for Fire': 'The ultimate adventure.',
 'Tae Guk Gi: The Brotherhood of War': 'Brothers at war. One is a hero. The other a soldier.',
 "Def Jam's How to Be a Player": "The ultimate player's guide to love and romance.",
 'House of Flying Daggers': 'Passion burns deep.',
 'The Great Beauty': 'The search for beauty and meaning in an age of excess.',
 'A Woman, a Gun and a Noodle Shop': 'Love, murder, and noodles in a small town in China.',
 'City of Life and Death': "When survival is all that's left, love is all that matters.",
 'Madadayo': 'A life in full.',
 'A Tale of Three Cities': 'Three lives, two cities, one destiny.',
 'The Blue Butterfly': 'Adventure can change your life forever.',
 'The Bélier Family': 'A heartwarming family comedy about finding your voice.',
 'I Am Love': 'Passion. Seduction. Betrayal. The private life of a family in the midst of changing times.',
 'Krrish': 'The ultimate superhero.',
 'The Swindle': 'The ultimate con.',
 'The Chambermaid on the Titanic': 'A love story that will sweep you off your feet',
 'Of Horses and Men': 'A poetic and unforgettable journey',
 'Loose Cannons': 'A heartwarming tale of family and acceptance',
 'The Legend of Suriyothai': 'A sweeping epic of love and war',
 'Flame & Citron': 'A thrilling story of courage and betrayal',
 'The Girl on the Train': 'A gripping mystery of obsession and deceit',
 'Star Wars: Clone Wars: Volume 1': 'The action-packed prequel to the Star Wars saga',
 'Shipwrecked': 'A thrilling adventure on the high seas',
 'Silmido': 'The true story of the toughest special forces unit in history',
 'The City of Your Final Destination': 'A journey of discovery and self-discovery',
 'The Rocket: The Legend of Rocket Richard': 'A tale of hockey, perseverance, and hope',
 'My Lucky Star': 'A romantic comedy that will steal your heart',
 'Green Street Hooligans: Underground': 'The underground world of football hooliganism',
 'Far from Men': 'A haunting and powerful drama of war and humanity',
 'Outside Bet': 'A heartwarming comedy about a group of friends',
 'The Omega Code': 'The ultimate battle between good and evil',
 'March or Die': 'A story of courage and honor in the face of danger',
 'Gerry': 'A mesmerizing journey into the unknown',
 'Hard to Be a God': 'A mind-bending journey into a bizarre and brutal world',
 'Z Storm': 'A thrilling tale of corruption and justice',
 'Dolphins and Whales: Tribes of the Ocean': 'A breathtaking journey into the world of marine life',
 'The Flower of Evil': 'A dark and twisted tale of family secrets and betrayal',
 'Lucky Break': 'A hilarious prison break comedy with heart',
 'Stiff Upper Lips': 'A hilarious send-up of Victorian England',
 'Khiladi 786': 'A high-octane Bollywood action-comedy',
 'The Chorus': 'A heartwarming story of music, hope, and redemption',
 'Le Havre': 'A charming and whimsical tale of kindness and community',
 'Animals': 'A raw and honest look at the highs and lows of youth',
 'Lights Out': 'A terrifying horror movie that will keep you on the edge of your seat',
 'House Party 2': 'A wild and crazy party with some of hip-hop’s biggest stars',
 'Still Alice': 'A heart-wrenching story of love and loss',
 'Not Easily Broken': 'A powerful and uplifting story of faith and perseverance',
 'Saved!': 'A hilarious satire of religious fundamentalism',
 'Force 10 from Navarone': 'A thrilling World War II adventure',
 'Standard Operating Procedure': 'A shocking exposé of the abuses of power in the military',
 'Fascination': 'A stylish and seductive thriller',
 'Dead Like Me: Life After Death': 'A darkly comic tale of life, death, and the afterlife',
 'Henry & Me': 'A heartwarming animated movie about the power of baseball',
 'We Have Your Husband': 'A gripping true story of love and desperation',
 'Dying of the Light': 'A tense and thrilling action movie with a great cast',
 'Running Forever': 'An inspiring story about never giving up',
 'Navy Seals vs. Zombies': 'Heroes never die, they just reload',
 'I Served the King of England': 'From a waiter to a millionaire, one man’s journey',
 'Soul Kitchen': 'A place where the food is hot and the music is cool',
 'Take Shelter': 'Prepare for the storm',
 'Camping Sauvage': 'Getting lost is the best way to find yourself',
 'All Hat': 'When the going gets tough, the tough get even',
 'State Fair': 'A celebration of agriculture, competition, and love',
 'Saving Private Perez': "He's not heavy, he's my brother",
 'Character': 'Power, passion, and revenge in a world gone mad',
 'Life During Wartime': 'The past is never far away',
 "Mozart's Sister": 'A woman ahead of her time',
 'Exiled': 'Family, loyalty, betrayal. Welcome to the underworld',
 'Lilya 4-ever': 'Some dreams are better than reality',
 'Fugly': 'The journey to find love and acceptance',
 "R.L. Stine's Monsterville: The Cabinet of Souls": 'The Halloween adventure you’ll never forget',
 'Silent Movie': 'Laugh-out-loud comedy without saying a word',
 'Anne of Green Gables': 'A timeless classic about love and family',
 'Falcon Rising': 'Justice has a new name',
 'Easy Money': 'Money, power, and crime in the Swedish underworld',
 'Paa': 'A heartwarming story about family and love',
 'Cargo': 'Survival is a mother’s instinct',
 'Love and Death on Long Island': 'A story of unlikely friendship and love',
 'Hansel and Gretel Get Baked': 'Some fairy tales are not for kids',
 'The Jerky Boys': 'The outrageous comedy that started it all',
 'Love Stinks': 'The comedy that proves love hurts',
 'Nothing': 'Sometimes nothing can be a real cool hand',
 'The Geographer Drank His Globe Away': 'A road trip for the soul',
 'The House of Sand': 'A story of love and survival in the Brazilian desert',
 'The Secret in Their Eyes': 'Justice has no expiration date',
 'Coming Home': 'The most unforgettable love story',
 'Summer Storm': 'A coming-of-age story with a twist',
 'Journey to Saturn': 'A comedy that is out of this world',
 'The Dress': 'A thriller that will keep you on the edge of your seat',
 'Silver Medalist': 'Life is not about winning, it’s about surviving',
 'Evil Words': 'The truth can be deadly',
 'The Triplets of Belleville': 'A wild and crazy ride through the City of Light',
 'Waltz with Bashir': 'Memory is a powerful weapon',
 'Slacker Uprising': 'Freedom is not free',
 'The Walking Deceased': 'Zombie apocalypse meets parody',
 'One to Another': 'A French thriller about passion and murder',
 'Deadline - U.S.A.': 'The greatest newspaper movie ever made',
 'The Lunchbox': 'A delicious love story from India',
 'Caramel': 'Life, love, and sweet moments',
 'The Bubble': 'Love knows no borders',
 'Dil Jo Bhi Kahey...': 'A family drama with a cross-cultural love story',
 'Kids': 'A raw and unflinching look at teenage life',
 'The Blue Room': 'A steamy thriller about passion and murder',
 'The Celebration': 'Family secrets can be deadly',
 'Journey from the Fall': 'The emotional journey of a family torn apart by war and reunited by destiny.',
 'Queen of the Mountains': 'She climbed the heights, and conquered her destiny.',
 'The Amazing Catfish': 'Sometimes, life needs to swim against the current.',
 'The Second Mother': "A mother's love knows no bounds.",
 'Desire': 'What do you really want?',
 'The Blade of Don Juan': 'He wields the blade of justice, but is it enough?',
 'Departure': 'Sometimes, the journey is more important than the destination.',
 'Hidden Away': 'Some secrets are meant to be kept hidden.',
 'Elza': 'A journey to the roots of the soul.',
 "Pandora's Box": 'Once opened, there is no going back.',
 'Live-In Maid': 'A relationship beyond the confines of employment.',
 'Aqua Teen Hunger Force Colon Movie Film for Theaters': 'Fast food meets fast-paced action in this animated comedy.',
 'The Hadza: Last of the First': 'A vanishing tribe and the fight to preserve their way of life.',
 'Carousel of Revenge': 'Revenge is a dish best served cold.',
 'Water & Power': 'A tale of two brothers, one dream, and the price of loyalty.',
 'Light from the Darkroom': 'In the darkest moments, there is still light to be found.',
 'The Country Doctor': "A doctor's oath to serve his community.",
 'The Harvest (La Cosecha)': 'The hidden faces of child labor in America.',
 'Juliet and Alfa Romeo': 'A love story that defies all odds.',
 'Faith Connections': 'Faith knows no borders.',
 'Wordplay': 'The art and joy of words.',
 'Civil Brand': 'Injustice breeds rebellion.',
 'Deceptive Practice: The Mysteries and Mentors of Ricky Jay': 'The art of deception, revealed.',
 'The Other Dream Team': 'Basketball and the fight for freedom.',
 'Crazy Stone': 'The chase for a priceless jade stone leads to madness.',
 'Scott Walker: 30 Century Man': 'The unconventional career of an enigmatic musician.',
 'Everything Put Together': "When life falls apart, it's up to us to put it back together.",
 'The Outrageous Sophie Tucker': 'The rags-to-riches story of a legendary performer.',
 'America Is Still the Place': 'The American Dream, still alive and well.',
 'Subconscious': 'The mind is a battlefield.',
 'El Rey de Najayo': "A man's quest for redemption in a world of corruption.",
 'Born to Fly: Elizabeth Streb vs. Gravity': 'Dancing with danger, soaring with grace.',
 'The Little Ponderosa Zoo': "A zoo that's more than just a collection of animals.",
 'Rise of the Entrepreneur: The Search for a Better Way': 'A journey into the world of entrepreneurship.',
 'As It Is in Heaven': 'A choir that lifts the soul to new heights.',
 'Love in the Time of Monsters': 'Zombies, monsters, and true love.',
 'The Dark Hours': 'The darkness within us all.',
 'Pierrot le Fou': 'A wild and crazy love story.',
 'American Beast': 'The beast within us all.',
 'The Case of the Grinning Cat': 'A cat, a mystery, and a city on the brink of change.',
 'The World Is Mine': 'When power corrupts, who will stand up for the people?',
 "Dude Where's My Dog?": 'A dog, a mystery, and a race against time.',
 'Gabriela': 'A love that defies convention.',
 'Her Cry: La Llorona Investigation': 'A chilling investigation into a legendary ghost story.',
 'Breaking Upwards': "Breaking up is hard to do, but sometimes it's the only way to move forward.",
 'The Circle': 'Privacy, transparency, or freedom? Choose wisely.',
 'Sanctuary: Quite a Conundrum': 'A twisted tale of love, betrayal, and murder.',
 'Cavite': 'A pulse-pounding thriller that will keep you on the edge of your seat.',
 'Signed, Sealed, Delivered': 'A love story delivered straight to your heart.',
 'My Date with Drew': "One man's quest to win the heart of his dream girl.",
 'The Hadza: Last of the First': 'Discover the last tribe of hunter-gatherers in Africa - The Hadza, and witness their struggle to survive in a rapidly changing world.'}

In [212]:
merged_dict = empty_tag.copy()

for key, value in empty_tag_2.items():
    if key in merged_dict:
        merged_dict[key] += value
    else:
        merged_dict[key] = value

In [213]:
#filling out empty taglines
def fill_empty_tagline(row):
    if row['title'] in merged_dict:
        row['tagline'] = merged_dict[row['title']]
    return row

df = df.apply(fill_empty_tagline, axis=1)
df['tagline'].fillna('', inplace=True)

In [214]:
# Count empty values
num_empty = df['tagline'].eq('').sum()
print("Number of empty values in 'tagline' column:", num_empty)

Number of empty values in 'tagline' column: 1


In [215]:
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,vote_average,vote_count,genre_list,keywords_unpacked
0,19995,Avatar,"[Sam Worthington, Zoe Saldana, Sigourney Weave...",James Cameron,"[James Cameron, Jon Landau]",[James Cameron],237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,7.2,11800,"[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon..."
1,285,Pirates of the Caribbean: At World's End,"[Johnny Depp, Orlando Bloom, Keira Knightley, ...",Gore Verbinski,"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Ted Elliott, Terry Rossio]",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",6.9,4500,"[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ..."
2,206647,Spectre,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...",Sam Mendes,"[Barbara Broccoli, Michael G. Wilson]","[John Logan, Robert Wade, Neal Purvis, Jez But...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,6.3,4466,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi..."
3,49026,The Dark Knight Rises,"[Christian Bale, Michael Caine, Gary Oldman, A...",Christopher Nolan,"[Charles Roven, Christopher Nolan, Emma Thomas]","[Christopher Nolan, Jonathan Nolan]",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,7.6,9106,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i..."
4,49529,John Carter,"[Taylor Kitsch, Lynn Collins, Samantha Morton,...",Andrew Stanton,"[Colin Wilson, Jim Morris, Lindsey Collins]","[Andrew Stanton, Michael Chabon, Mark Andrews]",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",6.1,2124,"[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel..."


### DATA PREPROCESSING

In [216]:
# Combining columns vote_averge and vote_count
df['rating'] = df['vote_average'] * df['vote_count']

#categorizing them by creating bins
df['rating'] = pd.qcut(df['rating'], q=3, labels=['badrating', 'okayrating', 'goodrating'])

In [217]:
# Create a function to categorize the runtime
def categorize_runtime(runtime):
    if runtime < 90:
        return 'Shortmovie'
    elif runtime <= 150:
        return 'Mediummovie'
    else:
        return 'Longmovie'

# Apply the categorize_runtime function to create a new column
df['runtime_cat'] = np.vectorize(categorize_runtime)(df['runtime'])

  outputs = ufunc(*inputs)


In [218]:
def list_to_string(val):
    if isinstance(val, list):
        return ', '.join(val)
    return str(val)

# Apply the function to the specified columns
columns_to_convert = ['director', 'cast_names', 'producer', 'screenplay_writer', 'genre_list', 'keywords_unpacked']
for column in columns_to_convert:
    df[column] = df[column].apply(list_to_string)

In [219]:
def combine_words(df, columns, new_column_suffix='_combined'):
    for column in columns:
        new_column_name = column + new_column_suffix
        df[new_column_name] = df[column].str.replace(' ', '').str.replace(',', ', ')

# Specify the columns to combine words
columns_to_combine = ['title', 'director', 'cast_names', 'producer', 'screenplay_writer', 'genre_list', 'keywords_unpacked']

# Apply the function
combine_words(df, columns_to_combine, new_column_suffix='_1')

# Display the updated dataframe
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,keywords_unpacked,rating,runtime_cat,title_1,director_1,cast_names_1,producer_1,screenplay_writer_1,genre_list_1,keywords_unpacked_1
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,"culture clash, future, space war, space colony...",goodrating,Longmovie,Avatar,JamesCameron,"SamWorthington, ZoeSaldana, SigourneyWeaver, S...","JamesCameron, JonLandau",JamesCameron,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s..."
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,"ocean, drug abuse, exotic island, east india t...",goodrating,Longmovie,PiratesoftheCaribbean:AtWorld'sEnd,GoreVerbinski,"JohnnyDepp, OrlandoBloom, KeiraKnightley, Stel...","JerryBruckheimer, EricMcLeod, ChadOman, PeterK...","TedElliott, TerryRossio","Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi..."
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,"spy, based on novel, secret agent, sequel, mi6...",goodrating,Mediummovie,Spectre,SamMendes,"DanielCraig, ChristophWaltz, LéaSeydoux, Ralph...","BarbaraBroccoli, MichaelG.Wilson","JohnLogan, RobertWade, NealPurvis, JezButterworth","Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b..."
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,"dc comics, crime fighter, terrorist, secret id...",goodrating,Longmovie,TheDarkKnightRises,ChristopherNolan,"ChristianBale, MichaelCaine, GaryOldman, AnneH...","CharlesRoven, ChristopherNolan, EmmaThomas","ChristopherNolan, JonathanNolan","Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident..."
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,"based on novel, mars, medallion, space travel,...",goodrating,Mediummovie,JohnCarter,AndrewStanton,"TaylorKitsch, LynnCollins, SamanthaMorton, Wil...","ColinWilson, JimMorris, LindseyCollins","AndrewStanton, MichaelChabon, MarkAndrews","Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr..."


In [220]:
# Define the function to convert list values to a string separated by commas
def list_to_string(val):
    if isinstance(val, list):
        return ', '.join(val)
    return str(val)

# Apply the function to the specified columns and create the "named_text" column
df['named_text'] = df.apply(lambda row: ', '.join([
    list_to_string(row['title_1']),
    list_to_string(row['director_1']),
    list_to_string(row['cast_names_1']),
    list_to_string(row['producer_1']),
    list_to_string(row['screenplay_writer_1']),
    list_to_string(row['genre_list_1']),
    list_to_string(row['keywords_unpacked_1']),
    list_to_string(row['runtime_cat']),
    list_to_string(row['rating'])
]), axis=1)

# Apply the function to the specified columns and create the "context_based_text" column
df['context_based_text'] = df.apply(lambda row: ', '.join([
    list_to_string(row['tagline']),
    list_to_string(row['overview']),
]), axis=1)

In [221]:
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,runtime_cat,title_1,director_1,cast_names_1,producer_1,screenplay_writer_1,genre_list_1,keywords_unpacked_1,named_text,context_based_text
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,Longmovie,Avatar,JamesCameron,"SamWorthington, ZoeSaldana, SigourneyWeaver, S...","JamesCameron, JonLandau",JamesCameron,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s...","Avatar, JamesCameron, SamWorthington, ZoeSalda...","Enter the World of Pandora., In the 22nd centu..."
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,Longmovie,PiratesoftheCaribbean:AtWorld'sEnd,GoreVerbinski,"JohnnyDepp, OrlandoBloom, KeiraKnightley, Stel...","JerryBruckheimer, EricMcLeod, ChadOman, PeterK...","TedElliott, TerryRossio","Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi...","PiratesoftheCaribbean:AtWorld'sEnd, GoreVerbin...","At the end of the world, the adventure begins...."
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,Mediummovie,Spectre,SamMendes,"DanielCraig, ChristophWaltz, LéaSeydoux, Ralph...","BarbaraBroccoli, MichaelG.Wilson","JohnLogan, RobertWade, NealPurvis, JezButterworth","Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b...","Spectre, SamMendes, DanielCraig, ChristophWalt...","A Plan No One Escapes, A cryptic message from ..."
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,Longmovie,TheDarkKnightRises,ChristopherNolan,"ChristianBale, MichaelCaine, GaryOldman, AnneH...","CharlesRoven, ChristopherNolan, EmmaThomas","ChristopherNolan, JonathanNolan","Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident...","TheDarkKnightRises, ChristopherNolan, Christia...","The Legend Ends, Following the death of Distri..."
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,Mediummovie,JohnCarter,AndrewStanton,"TaylorKitsch, LynnCollins, SamanthaMorton, Wil...","ColinWilson, JimMorris, LindseyCollins","AndrewStanton, MichaelChabon, MarkAndrews","Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr...","JohnCarter, AndrewStanton, TaylorKitsch, LynnC...","Lost in our world, found in another., John Car..."


In [222]:
df['named_text'][0]

'Avatar, JamesCameron, SamWorthington, ZoeSaldana, SigourneyWeaver, StephenLang, MichelleRodriguez, GiovanniRibisi, JoelDavidMoore, CCHPounder, WesStudi, LazAlonso, DileepRao, MattGerald, SeanAnthonyMoran, JasonWhyte, ScottLawrence, KellyKilgour, JamesPatrickPitt, SeanPatrickMurphy, PeterDillon, KevinDorman, KelsonHenderson, DavidVanHorn, JacobTomuri, MichaelBlain-Rozgay, JonCurry, LukeHawker, WoodySchultz, PeterMensah, SoniaYee, JahnelCurfman, IlramChoi, KylaWarren, LisaRoumain, DebraWilson, ChrisMala, TaylorKibby, JodieLandau, JulieLamm, CullenB.Madden, JosephBradyMadden, FrankieTorres, AustinWilson, SaraWilson, TamicaWashington-Miller, LucyBriant, NathanMeister, GerryBlair, MatthewChamberlain, PaulYates, WrayWilson, JamesGaylyn, MelvinLenoClarkIII, CarvonFutrell, BrandonJelkes, MicahMoch, HanniyahMuhammad, ChristopherNolen, ChristaOliver, AprilMarieThomas, BravitaA.Threatt, ColinBleasdale, MikeBodnar, MattClayton, NicoleDionne, JamieHarrison, AllanHenry, AnthonyIngruber, AshleyJeffe

In [223]:
df['context_based_text'][0]

'Enter the World of Pandora., In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.'

In [224]:
# Remove repeated values in the columns
df['named_text'] = df['named_text'].apply(lambda x: ', '.join(list(set(x.split(', ')))))

In [225]:
df['named_text'][0]

'future, GarethRuck, JosephMika-Hunt, spacetravel, TamicaWashington-Miller, CarvonFutrell, ChristaOliver, Fantasy, GerryBlair, StuartPollock, Adventure, soldier, MatthewChamberlain, Longmovie, society, PeterDillon, WoodySchultz, SaraWilson, ScienceFiction, AprilMarieThomas, AliciaVela-Bailey, HanniyahMuhammad, JulieLamm, MicahMoch, powerrelations, PeterMensah, DavidVanHorn, MattClayton, KellyKilgour, AshleyJeffery, JasonWhyte, NicoleDionne, WesStudi, T.J.Storm, KaiPantano, ColinBleasdale, AustinWilson, MattGerald, Avatar, MikeBodnar, cgi, mindandsoul, MichelleRodriguez, BravitaA.Threatt, space, ZoeSaldana, CullenB.Madden, BrandonJelkes, KelsonHenderson, JamesGaylyn, LisaRoumain, ScottLawrence, LucyBriant, AllanHenry, alien, alienplanet, ChrisMala, NathanMeister, DeanKnowsley, TaylorKibby, JodieLandau, IlramChoi, NikieZambo, SigourneyWeaver, SeanAnthonyMoran, JahnelCurfman, JonLandau, WrayWilson, battle, marine, loveaffair, DebraWilson, DileepRao, ChristopherNolen, JacobTomuri, JamesCam

In [226]:
# Create function to clean text
def clean_text(text):
    if isinstance(text, str):
        # Remove punctuation and non-alphanumeric characters using regex
        text_clean = re.sub(r'[^a-zA-Z0-9\s]', '', text)
        # Remove extra whitespace
        text_clean = re.sub(r'\s+', ' ', text_clean).strip()
    else:
        text_clean = ""
    return text_clean

# Clean the overview column
df['named_text_clean'] = df['named_text'].apply(lambda x: clean_text(x))
df['context_based_text_clean'] = df['context_based_text'].apply(lambda x: clean_text(x))
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,director_1,cast_names_1,producer_1,screenplay_writer_1,genre_list_1,keywords_unpacked_1,named_text,context_based_text,named_text_clean,context_based_text_clean
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,JamesCameron,"SamWorthington, ZoeSaldana, SigourneyWeaver, S...","JamesCameron, JonLandau",JamesCameron,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s...","future, GarethRuck, JosephMika-Hunt, spacetrav...","Enter the World of Pandora., In the 22nd centu...",future GarethRuck JosephMikaHunt spacetravel T...,Enter the World of Pandora In the 22nd century...
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,GoreVerbinski,"JohnnyDepp, OrlandoBloom, KeiraKnightley, Stel...","JerryBruckheimer, EricMcLeod, ChadOman, PeterK...","TedElliott, TerryRossio","Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi...","AndyBeckwith, ocean, ChristopherS.Capp, GregEl...","At the end of the world, the adventure begins....",AndyBeckwith ocean ChristopherSCapp GregEllis ...,At the end of the world the adventure begins C...
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,SamMendes,"DanielCraig, ChristophWaltz, LéaSeydoux, Ralph...","BarbaraBroccoli, MichaelG.Wilson","JohnLogan, RobertWade, NealPurvis, JezButterworth","Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b...","TenochHuerta, MarcZinga, KimAdis, SadaoUeda, D...","A Plan No One Escapes, A cryptic message from ...",TenochHuerta MarcZinga KimAdis SadaoUeda Derek...,A Plan No One Escapes A cryptic message from B...
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,ChristopherNolan,"ChristianBale, MichaelCaine, GaryOldman, AnneH...","CharlesRoven, ChristopherNolan, EmmaThomas","ChristopherNolan, JonathanNolan","Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident...","KirstenRoeters, AlonAboutboul, PatrickLeahy, K...","The Legend Ends, Following the death of Distri...",KirstenRoeters AlonAboutboul PatrickLeahy Kyle...,The Legend Ends Following the death of Distric...
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,AndrewStanton,"TaylorKitsch, LynnCollins, SamanthaMorton, Wil...","ColinWilson, JimMorris, LindseyCollins","AndrewStanton, MichaelChabon, MarkAndrews","Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr...","mars, PippaNixon, martian, DarylSabara, PollyW...","Lost in our world, found in another., John Car...",mars PippaNixon martian DarylSabara PollyWalke...,Lost in our world found in another John Carter...


In [227]:
# Create function to tokenize and lowercase data
def tokenize(text):
    # W+ means that either a word character (A-Za-z0-9_) or a dash (-) can go there.
    tokens = re.split('\W+', text)
    return tokens

# Tokenize and lowercase data 
df['named_text_tokenized'] = df['named_text_clean'].apply(lambda x: tokenize(x.lower()))
df['context_based_text_tokenized'] = df['context_based_text_clean'].apply(lambda x: tokenize(x.lower()))
df.head()

Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,producer_1,screenplay_writer_1,genre_list_1,keywords_unpacked_1,named_text,context_based_text,named_text_clean,context_based_text_clean,named_text_tokenized,context_based_text_tokenized
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,"JamesCameron, JonLandau",JamesCameron,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s...","future, GarethRuck, JosephMika-Hunt, spacetrav...","Enter the World of Pandora., In the 22nd centu...",future GarethRuck JosephMikaHunt spacetravel T...,Enter the World of Pandora In the 22nd century...,"[future, garethruck, josephmikahunt, spacetrav...","[enter, the, world, of, pandora, in, the, 22nd..."
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,"JerryBruckheimer, EricMcLeod, ChadOman, PeterK...","TedElliott, TerryRossio","Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi...","AndyBeckwith, ocean, ChristopherS.Capp, GregEl...","At the end of the world, the adventure begins....",AndyBeckwith ocean ChristopherSCapp GregEllis ...,At the end of the world the adventure begins C...,"[andybeckwith, ocean, christopherscapp, gregel...","[at, the, end, of, the, world, the, adventure,..."
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,"BarbaraBroccoli, MichaelG.Wilson","JohnLogan, RobertWade, NealPurvis, JezButterworth","Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b...","TenochHuerta, MarcZinga, KimAdis, SadaoUeda, D...","A Plan No One Escapes, A cryptic message from ...",TenochHuerta MarcZinga KimAdis SadaoUeda Derek...,A Plan No One Escapes A cryptic message from B...,"[tenochhuerta, marczinga, kimadis, sadaoueda, ...","[a, plan, no, one, escapes, a, cryptic, messag..."
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,"CharlesRoven, ChristopherNolan, EmmaThomas","ChristopherNolan, JonathanNolan","Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident...","KirstenRoeters, AlonAboutboul, PatrickLeahy, K...","The Legend Ends, Following the death of Distri...",KirstenRoeters AlonAboutboul PatrickLeahy Kyle...,The Legend Ends Following the death of Distric...,"[kirstenroeters, alonaboutboul, patrickleahy, ...","[the, legend, ends, following, the, death, of,..."
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,"ColinWilson, JimMorris, LindseyCollins","AndrewStanton, MichaelChabon, MarkAndrews","Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr...","mars, PippaNixon, martian, DarylSabara, PollyW...","Lost in our world, found in another., John Car...",mars PippaNixon martian DarylSabara PollyWalke...,Lost in our world found in another John Carter...,"[mars, pippanixon, martian, darylsabara, polly...","[lost, in, our, world, found, in, another, joh..."


In [228]:
nltk.download('stopwords')

# Define stop words list
stopwords = nltk.corpus.stopwords.words('english')     # All English Stopwords
addn_stop = ['actor', 'actress', 'adaptation', 'award', 'box office', 'budget', 'cameo', 'cast', 'character', 'cinema', 'credits', 'crew', 'director', 'entertainment', 'film', 'genre', 'Hollywood', 'indie', 'movie', 'music', 'plot', 'premiere', 'production', 'rating', 'review', 'scene', 'screenplay', 'sequel', 'sound', 'special effects', 'story', 'subtitle', 'trailer', 'visual effects', 'audience', 'blockbuster', 'camera', 'cameo appearance', 'cinematography', 'climax', 'costume', 'dialogue', "director's cut", 'distribution', 'editing', 'ensemble cast', 'film festival', 'filmography', 'filmmaker', 'flashback', 'flop', 'frame', 'genre-bending', 'green screen', 'leading actor/actress', 'lighting', 'location', 'motion picture', 'movie set', 'MPAA rating', 'on-screen', 'opening weekend', 'performance', 'producer', 'project', 'protagonist', 'rating system', 'remake', 'resolution', 'running time', 'screenwriter', 'shot', 'sound effects', 'soundtrack', 'take', 'title', 'trailer', 'voice-over', 'widescreen', 'film', 'films', 'movie', 'movies']
stopwords.extend(addn_stop)                           # Add custom stop words

# Create function to remove stopwords
def remove_stopwords(tokenized_list):
    text = [word for word in tokenized_list if word.lower() not in stopwords]
    return text

# Remove stop words from data
df['context_based_text_nostop'] = df['context_based_text_tokenized'].apply(lambda x: remove_stopwords(x))
df.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mariy\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,screenplay_writer_1,genre_list_1,keywords_unpacked_1,named_text,context_based_text,named_text_clean,context_based_text_clean,named_text_tokenized,context_based_text_tokenized,context_based_text_nostop
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,JamesCameron,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s...","future, GarethRuck, JosephMika-Hunt, spacetrav...","Enter the World of Pandora., In the 22nd centu...",future GarethRuck JosephMikaHunt spacetravel T...,Enter the World of Pandora In the 22nd century...,"[future, garethruck, josephmikahunt, spacetrav...","[enter, the, world, of, pandora, in, the, 22nd...","[enter, world, pandora, 22nd, century, paraple..."
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,"TedElliott, TerryRossio","Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi...","AndyBeckwith, ocean, ChristopherS.Capp, GregEl...","At the end of the world, the adventure begins....",AndyBeckwith ocean ChristopherSCapp GregEllis ...,At the end of the world the adventure begins C...,"[andybeckwith, ocean, christopherscapp, gregel...","[at, the, end, of, the, world, the, adventure,...","[end, world, adventure, begins, captain, barbo..."
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,"JohnLogan, RobertWade, NealPurvis, JezButterworth","Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b...","TenochHuerta, MarcZinga, KimAdis, SadaoUeda, D...","A Plan No One Escapes, A cryptic message from ...",TenochHuerta MarcZinga KimAdis SadaoUeda Derek...,A Plan No One Escapes A cryptic message from B...,"[tenochhuerta, marczinga, kimadis, sadaoueda, ...","[a, plan, no, one, escapes, a, cryptic, messag...","[plan, one, escapes, cryptic, message, bonds, ..."
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,"ChristopherNolan, JonathanNolan","Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident...","KirstenRoeters, AlonAboutboul, PatrickLeahy, K...","The Legend Ends, Following the death of Distri...",KirstenRoeters AlonAboutboul PatrickLeahy Kyle...,The Legend Ends Following the death of Distric...,"[kirstenroeters, alonaboutboul, patrickleahy, ...","[the, legend, ends, following, the, death, of,...","[legend, ends, following, death, district, att..."
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,"AndrewStanton, MichaelChabon, MarkAndrews","Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr...","mars, PippaNixon, martian, DarylSabara, PollyW...","Lost in our world, found in another., John Car...",mars PippaNixon martian DarylSabara PollyWalke...,Lost in our world found in another John Carter...,"[mars, pippanixon, martian, darylsabara, polly...","[lost, in, our, world, found, in, another, joh...","[lost, world, found, another, john, carter, wa..."


In [229]:
import nltk
nltk.download('wordnet')

wn = nltk.WordNetLemmatizer()
# Create function to apply lematizer
def lemmatizing(tokenized_text):
    text = [wn.lemmatize(word) for word in tokenized_text]
    return text

# Apply lemmatizer
df['context_based_text_lemmatized'] = df['context_based_text_nostop'].apply(lambda x: lemmatizing(x))
df.head(10)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mariy\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Unnamed: 0,movie_id,title,cast_names,director,producer,screenplay_writer,budget,overview,runtime,tagline,...,genre_list_1,keywords_unpacked_1,named_text,context_based_text,named_text_clean,context_based_text_clean,named_text_tokenized,context_based_text_tokenized,context_based_text_nostop,context_based_text_lemmatized
0,19995,Avatar,"Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,"James Cameron, Jon Landau",James Cameron,237000000,"In the 22nd century, a paraplegic Marine is di...",162.0,Enter the World of Pandora.,...,"Action, Adventure, Fantasy, ScienceFiction","cultureclash, future, spacewar, spacecolony, s...","future, GarethRuck, JosephMika-Hunt, spacetrav...","Enter the World of Pandora., In the 22nd centu...",future GarethRuck JosephMikaHunt spacetravel T...,Enter the World of Pandora In the 22nd century...,"[future, garethruck, josephmikahunt, spacetrav...","[enter, the, world, of, pandora, in, the, 22nd...","[enter, world, pandora, 22nd, century, paraple...","[enter, world, pandora, 22nd, century, paraple..."
1,285,Pirates of the Caribbean: At World's End,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...","Ted Elliott, Terry Rossio",300000000,"Captain Barbossa, long believed to be dead, ha...",169.0,"At the end of the world, the adventure begins.",...,"Adventure, Fantasy, Action","ocean, drugabuse, exoticisland, eastindiatradi...","AndyBeckwith, ocean, ChristopherS.Capp, GregEl...","At the end of the world, the adventure begins....",AndyBeckwith ocean ChristopherSCapp GregEllis ...,At the end of the world the adventure begins C...,"[andybeckwith, ocean, christopherscapp, gregel...","[at, the, end, of, the, world, the, adventure,...","[end, world, adventure, begins, captain, barbo...","[end, world, adventure, begin, captain, barbos..."
2,206647,Spectre,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"Barbara Broccoli, Michael G. Wilson","John Logan, Robert Wade, Neal Purvis, Jez Butt...",245000000,A cryptic message from Bond’s past sends him o...,148.0,A Plan No One Escapes,...,"Action, Adventure, Crime","spy, basedonnovel, secretagent, sequel, mi6, b...","TenochHuerta, MarcZinga, KimAdis, SadaoUeda, D...","A Plan No One Escapes, A cryptic message from ...",TenochHuerta MarcZinga KimAdis SadaoUeda Derek...,A Plan No One Escapes A cryptic message from B...,"[tenochhuerta, marczinga, kimadis, sadaoueda, ...","[a, plan, no, one, escapes, a, cryptic, messag...","[plan, one, escapes, cryptic, message, bonds, ...","[plan, one, escape, cryptic, message, bond, pa..."
3,49026,The Dark Knight Rises,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Charles Roven, Christopher Nolan, Emma Thomas","Christopher Nolan, Jonathan Nolan",250000000,Following the death of District Attorney Harve...,165.0,The Legend Ends,...,"Action, Crime, Drama, Thriller","dccomics, crimefighter, terrorist, secretident...","KirstenRoeters, AlonAboutboul, PatrickLeahy, K...","The Legend Ends, Following the death of Distri...",KirstenRoeters AlonAboutboul PatrickLeahy Kyle...,The Legend Ends Following the death of Distric...,"[kirstenroeters, alonaboutboul, patrickleahy, ...","[the, legend, ends, following, the, death, of,...","[legend, ends, following, death, district, att...","[legend, end, following, death, district, atto..."
4,49529,John Carter,"Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Colin Wilson, Jim Morris, Lindsey Collins","Andrew Stanton, Michael Chabon, Mark Andrews",260000000,"John Carter is a war-weary, former military ca...",132.0,"Lost in our world, found in another.",...,"Action, Adventure, ScienceFiction","basedonnovel, mars, medallion, spacetravel, pr...","mars, PippaNixon, martian, DarylSabara, PollyW...","Lost in our world, found in another., John Car...",mars PippaNixon martian DarylSabara PollyWalke...,Lost in our world found in another John Carter...,"[mars, pippanixon, martian, darylsabara, polly...","[lost, in, our, world, found, in, another, joh...","[lost, world, found, another, john, carter, wa...","[lost, world, found, another, john, carter, wa..."
5,559,Spider-Man 3,"Tobey Maguire, Kirsten Dunst, James Franco, Th...",Sam Raimi,"Laura Ziskin, Avi Arad, Grant Curtis","Sam Raimi, Alvin Sargent, Ivan Raimi",258000000,The seemingly invincible Spider-Man goes up ag...,139.0,The battle within.,...,"Fantasy, Action, Adventure","dualidentity, amnesia, sandstorm, loveofone'sl...","ShaunPatrickFlynn, wretch, JimmyStar, TajnaTan...","The battle within., The seemingly invincible S...",ShaunPatrickFlynn wretch JimmyStar TajnaTanovi...,The battle within The seemingly invincible Spi...,"[shaunpatrickflynn, wretch, jimmystar, tajnata...","[the, battle, within, the, seemingly, invincib...","[battle, within, seemingly, invincible, spider...","[battle, within, seemingly, invincible, spider..."
6,38757,Tangled,"Zachary Levi, Mandy Moore, Donna Murphy, Ron P...",Byron Howard,Roy Conli,Dan Fogelman,260000000,When the kingdom's most wanted-and most charmi...,100.0,They're taking adventure to new lengths.,...,"Animation, Family","hostage, magic, horse, fairytale, musical, pri...","healingpower, ByronHoward, duringcreditsstinge...","They're taking adventure to new lengths., When...",healingpower ByronHoward duringcreditsstinger ...,Theyre taking adventure to new lengths When th...,"[healingpower, byronhoward, duringcreditssting...","[theyre, taking, adventure, to, new, lengths, ...","[theyre, taking, adventure, new, lengths, king...","[theyre, taking, adventure, new, length, kingd..."
7,99861,Avengers: Age of Ultron,"Robert Downey Jr., Chris Hemsworth, Mark Ruffa...",Joss Whedon,Kevin Feige,,280000000,When Tony Stark tries to jumpstart a dormant p...,141.0,A New Age Has Come.,...,"Action, Adventure, ScienceFiction","marvelcomic, sequel, superhero, basedoncomicbo...",", MathapeloSeptember, marvelcinematicuniverse,...","A New Age Has Come., When Tony Stark tries to ...",MathapeloSeptember marvelcinematicuniverse Con...,A New Age Has Come When Tony Stark tries to ju...,"[mathapeloseptember, marvelcinematicuniverse, ...","[a, new, age, has, come, when, tony, stark, tr...","[new, age, come, tony, stark, tries, jumpstart...","[new, age, come, tony, stark, try, jumpstart, ..."
8,767,Harry Potter and the Half-Blood Prince,"Daniel Radcliffe, Rupert Grint, Emma Watson, T...",David Yates,"David Heyman, David Barron, Tim Lewis",Steve Kloves,250000000,"As Harry begins his sixth year at Hogwarts, he...",153.0,Dark Secrets Revealed,...,"Adventure, Fantasy, Family","witch, magic, broom, schoolofwitchcraft, wizar...","DavidThewlis, EmmaWatson, RobbieColtrane, Davi...","Dark Secrets Revealed, As Harry begins his six...",DavidThewlis EmmaWatson RobbieColtrane DavidBr...,Dark Secrets Revealed As Harry begins his sixt...,"[davidthewlis, emmawatson, robbiecoltrane, dav...","[dark, secrets, revealed, as, harry, begins, h...","[dark, secrets, revealed, harry, begins, sixth...","[dark, secret, revealed, harry, begin, sixth, ..."
9,209112,Batman v Superman: Dawn of Justice,"Ben Affleck, Henry Cavill, Gal Gadot, Amy Adam...",Zack Snyder,"Charles Roven, Deborah Snyder","David S. Goyer, Chris Terrio",250000000,Fearing the actions of a god-like Super Hero l...,151.0,Justice or revenge,...,"Action, Adventure, Fantasy","dccomics, vigilante, superhero, basedoncomicbo...","RipleySobo, TaoOkamoto, LauraAtwood, PatrickLe...","Justice or revenge, Fearing the actions of a g...",RipleySobo TaoOkamoto LauraAtwood PatrickLeahy...,Justice or revenge Fearing the actions of a go...,"[ripleysobo, taookamoto, lauraatwood, patrickl...","[justice, or, revenge, fearing, the, actions, ...","[justice, revenge, fearing, actions, godlike, ...","[justice, revenge, fearing, action, godlike, s..."


In [230]:
# Combine values from 'named_text_tokenized' and 'context_based_text_lemmatized' into a new column
df['combined_text'] = df['named_text_tokenized'].apply(lambda x: ' '.join(x)) + ' ' + df['context_based_text_lemmatized'].apply(lambda x: ' '.join(x))

In [231]:
# Apply vectorization on the combined text
vectorizer = TfidfVectorizer()
tfidf_counts = vectorizer.fit_transform(df['combined_text'])

In [232]:
# Compute pairwise cosine similarity on tf-idf matrix
cosine_sim = cosine_similarity(tfidf_counts, tfidf_counts)

# Print similarity matrix
print(cosine_sim)

[[1.00000000e+00 1.16818781e-02 4.45421798e-03 ... 6.50453694e-03
  1.54054459e-03 0.00000000e+00]
 [1.16818781e-02 1.00000000e+00 1.58060272e-02 ... 9.50917617e-03
  5.35237703e-03 0.00000000e+00]
 [4.45421798e-03 1.58060272e-02 1.00000000e+00 ... 5.29030228e-03
  3.20649507e-04 3.31434131e-03]
 ...
 [6.50453694e-03 9.50917617e-03 5.29030228e-03 ... 1.00000000e+00
  9.20569701e-03 1.73620099e-02]
 [1.54054459e-03 5.35237703e-03 3.20649507e-04 ... 9.20569701e-03
  1.00000000e+00 8.22147751e-03]
 [0.00000000e+00 0.00000000e+00 3.31434131e-03 ... 1.73620099e-02
  8.22147751e-03 1.00000000e+00]]


#Create a dataframe that contains only the title and combined_text columns from your original dataframe

In [233]:
df_movies = df[['title', 'combined_text']]

#Define a function that takes a movie title as input, and returns the top 5 most similar movies based on cosine similarity.

In [234]:
def get_movie_recomendation(title):
    # Get index of the input movie
    index = df_movies[df_movies['title'] == title].index[0]

    # Calculate cosine similarity between the input movie and all other movies
    cosine_similarities = cosine_similarity(tfidf_counts[index], tfidf_counts)

    # Get the top 5 most similar movies
    similar_movies_indices = cosine_similarities.argsort()[0][-6:-1][::-1]
    similar_movies = df_movies.iloc[similar_movies_indices]['title']

    return similar_movies.tolist()

In [235]:
get_movie_recomendation ('The Dark Knight')

['Batman Returns',
 'The Dark Knight Rises',
 'Batman',
 'Batman Begins',
 'Batman: The Dark Knight Returns, Part 2']

In [236]:
get_movie_recomendation ('The Shawshank Redemption')

['The Green Mile', 'Civil Brand', 'Prison', 'The Majestic', 'Fortress']

In [237]:
get_movie_recomendation ('Frozen')

['Big Hero 6',
 'Wreck-It Ralph',
 'The Snow Queen',
 'Enchanted',
 'Mr. Peabody & Sherman']