In [1]:
import pandas as pd
import json

In [2]:
# Metadata for 81,741 movies, extracted from the Noverber 4, 2012 dump of Freebase.  Tab-separated; columns:

# 1. Wikipedia movie ID
# 2. Freebase movie ID
# 3. Movie name
# 4. Movie release date
# 5. Movie box office revenue
# 6. Movie runtime
# 7. Movie languages (Freebase ID:name tuples)
# 8. Movie countries (Freebase ID:name tuples)
# 9. Movie genres (Freebase ID:name tuples)
metadata_cols = ["wiki_id", "freebase_id", "name", "release_date", "revenue", "runtime", "languages", "countries", "genres"]
metadata_df = pd.read_csv('movie.metadata.tsv', sep='\t', names=metadata_cols)
metadata_df = metadata_df[['wiki_id', 'name', 'release_date', 'genres']]
metadata_df['wiki_id'] = metadata_df['wiki_id'].astype(int)

In [27]:
plot_df = pd.read_csv('plot_summaries.txt', sep='\n', names=['plot'])
plot_df = plot_df['plot'].str.split('\t', expand=True)
plot_df.columns = ['wiki_id', 'plot']
plot_df['wiki_id'] = plot_df['wiki_id'].astype(int)

In [28]:
plot_df

Unnamed: 0,wiki_id,plot
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha..."
1,31186339,The nation of Panem consists of a wealthy Capi...
2,20663735,Poovalli Induchoodan is sentenced for six yea...
3,2231378,"The Lemon Drop Kid , a New York City swindler,..."
4,595909,Seventh-day Adventist Church pastor Michael Ch...
...,...,...
42301,34808485,"The story is about Reema , a young Muslim scho..."
42302,1096473,"In 1928 Hollywood, director Leo Andreyev look..."
42303,35102018,American Luthier focuses on Randy Parsons’ tra...
42304,8628195,"Abdur Rehman Khan , a middle-aged dry fruit se..."


In [51]:
metadata_df

Unnamed: 0,wiki_id,name,release_date,genres
0,975900,Ghosts of Mars,2001-08-24,"{""/m/01jfsb"": ""Thriller"", ""/m/06n90"": ""Science..."
1,3196793,Getting Away with Murder: The JonBenét Ramsey ...,2000-02-16,"{""/m/02n4kr"": ""Mystery"", ""/m/03bxz7"": ""Biograp..."
2,28463795,Brun bitter,1988,"{""/m/0lsxr"": ""Crime Fiction"", ""/m/07s9rl0"": ""D..."
3,9363483,White Of The Eye,1987,"{""/m/01jfsb"": ""Thriller"", ""/m/0glj9q"": ""Erotic..."
4,261236,A Woman in Flames,1983,"{""/m/07s9rl0"": ""Drama""}"
...,...,...,...,...
81736,35228177,Mermaids: The Body Found,2011-03-19,"{""/m/07s9rl0"": ""Drama""}"
81737,34980460,Knuckle,2011-01-21,"{""/m/03bxz7"": ""Biographical film"", ""/m/07s9rl0..."
81738,9971909,Another Nice Mess,1972-09-22,"{""/m/06nbt"": ""Satire"", ""/m/01z4y"": ""Comedy""}"
81739,913762,The Super Dimension Fortress Macross II: Lover...,1992-05-21,"{""/m/06n90"": ""Science Fiction"", ""/m/0gw5n2f"": ..."


In [71]:
movie_df = pd.merge(metadata_df, plot_df, on='wiki_id', how='inner')
# Drop na valued roles
movie_df.dropna(inplace=True)
# Format release_date
movie_df['release_date'] = movie_df['release_date'].map(lambda date: date[:4]).astype(int)

movie_df

Unnamed: 0,wiki_id,name,release_date,genres,plot
0,975900,Ghosts of Mars,2001,"{""/m/01jfsb"": ""Thriller"", ""/m/06n90"": ""Science...","Set in the second half of the 22nd century, th..."
1,9363483,White Of The Eye,1987,"{""/m/01jfsb"": ""Thriller"", ""/m/0glj9q"": ""Erotic...",A series of murders of rich young women throug...
2,261236,A Woman in Flames,1983,"{""/m/07s9rl0"": ""Drama""}","Eva, an upper class housewife, becomes frustra..."
3,18998739,The Sorcerer's Apprentice,2002,"{""/m/0hqxf"": ""Family Film"", ""/m/01hmnh"": ""Fant...","Every hundred years, the evil Morgana returns..."
4,6631279,Little city,1997,"{""/m/06cvj"": ""Romantic comedy"", ""/m/0hj3n0w"": ...","Adam, a San Francisco-based artist who works a..."
...,...,...,...,...,...
42202,23851782,The Ghost Train,1941,"{""/m/0lsxr"": ""Crime Fiction"", ""/m/01jfsb"": ""Th...",{{plot}} The film opens with a Great Western e...
42203,35228177,Mermaids: The Body Found,2011,"{""/m/07s9rl0"": ""Drama""}",Two former National Oceanic Atmospheric Admini...
42204,34980460,Knuckle,2011,"{""/m/03bxz7"": ""Biographical film"", ""/m/07s9rl0...",{{No plot}} This film follows 12 years in the ...
42205,913762,The Super Dimension Fortress Macross II: Lover...,1992,"{""/m/06n90"": ""Science Fiction"", ""/m/0gw5n2f"": ...","The story takes place in the year 2092,The Sup..."


In [72]:
# Extract moives released in 1990s
movie_df = movie_df[(movie_df['release_date'] >= 1990) & (movie_df['release_date'] <= 1999)]

In [73]:
movie_df

Unnamed: 0,wiki_id,name,release_date,genres,plot
4,6631279,Little city,1997,"{""/m/06cvj"": ""Romantic comedy"", ""/m/0hj3n0w"": ...","Adam, a San Francisco-based artist who works a..."
17,26067101,Siam Sunset,1999,"{""/m/06cvj"": ""Romantic comedy"", ""/m/02l7c8"": ""...",Perry is an English chemist working for a pain...
19,9548445,Archie: To Riverdale and Back Again,1990,"{""/m/01z4y"": ""Comedy""}","Archie Andrews, fifteen years after graduating..."
20,2487170,Troops,1997,"{""/m/068twy"": ""Fan film"", ""/m/02hmvc"": ""Short ...",An example of the film's comedic tone comes in...
27,29198000,Chandra Mukhi,1993,"{""/m/02l7c8"": ""Romance Film"", ""/m/07s9rl0"": ""D...","Chandra Mukhi , a princess of a heavenly kingd..."
...,...,...,...,...,...
42184,1191380,Wilde,1997,"{""/m/0hn10"": ""LGBT"", ""/m/017fp"": ""Biography"", ...",The film opens with Oscar Wilde's 1882 visit t...
42194,7761830,Spaced Invaders,1990,"{""/m/0hj3mt0"": ""Alien Film"", ""/m/06n90"": ""Scie...","The space armada from Mars, known as the Imper..."
42198,664006,Guilty as Sin,1993,"{""/m/01jfsb"": ""Thriller"", ""/m/0glj9q"": ""Erotic...",Jennifer Haines is an up-and-coming Chicago a...
42200,15394941,Gopi Kishan,1994,"{""/m/07s9rl0"": ""Drama"", ""/m/02kdv5l"": ""Action""...",Kishan killed a man in his childhood when the...


In [75]:
def filter_romance_genre(genre_str):
    genre_dict = json.loads(genre_str)
    genres = genre_dict.values()
    for genre in genres:
        genre_lower = genre.lower()
        if 'romance' in genre_lower or 'romantic' in genre_lower:
            return 'Romance'
    return 'Non-romance'
    
movie_df['genres'] = movie_df['genres'].map(lambda s: filter_romance_genre(s))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movie_df['genres'] = movie_df['genres'].map(lambda s: filter_romance_genre(s))


In [76]:
# Extract only romance genre
final_df = movie_df[movie_df['genres'] == 'Romance']

In [77]:
final_df.shape

(1109, 5)

In [78]:
final_df

Unnamed: 0,wiki_id,name,release_date,genres,plot
4,6631279,Little city,1997,Romance,"Adam, a San Francisco-based artist who works a..."
17,26067101,Siam Sunset,1999,Romance,Perry is an English chemist working for a pain...
27,29198000,Chandra Mukhi,1993,Romance,"Chandra Mukhi , a princess of a heavenly kingd..."
70,1072373,Minsaara Kanavu,1997,Romance,Priya Amal Raj loses her mother at a very you...
80,1031231,"Black Cat, White Cat",1998,Romance,"Matko Destanov, a small-time Roma smuggler and..."
...,...,...,...,...,...
42006,699336,Nine Months,1995,Romance,Child psychologist Samuel Faulkner's ideal ro...
42027,15687214,A Business Affair,1994,Romance,The film is centred around the life of Kate Sw...
42162,11823946,Twelfth Night: Or What You Will,1996,Romance,Viola and Sebastian are young twins and ente...
42179,30553937,Confessions of a Sexist Pig,1998,Romance,A daytime soap opera star has to deal with his...
