Code to map minifigs to actors using minifigs, cast_in, movies, and movie_cast datasets

In [None]:


import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', None)

minifigs = pd.read_csv("/content/drive/My Drive/550_project/minifigs.csv", dtype=str)
cast_in = pd.read_csv("/content/drive/My Drive/550_project/cast_in.csv", dtype=str, usecols=[0, 1, 2])
movies = pd.read_csv("/content/drive/My Drive/550_project/movies.csv", dtype=str, usecols=[0, 1])
actors = pd.read_csv("/content/drive/My Drive/550_project/movie_cast.csv", dtype=str, usecols=[0, 1])

In [None]:
minifigs.head()

Unnamed: 0.1,Unnamed: 0,fig_num,name,num_parts,image_url
0,0,fig-000001,Toy Store Employee,4,https://cdn.rebrickable.com/media/sets/fig-000001/63692.jpg
1,1,fig-000002,Customer Kid,4,https://cdn.rebrickable.com/media/sets/fig-000002/63691.jpg
2,2,fig-000003,"Assassin Droid, White",8,https://cdn.rebrickable.com/media/sets/fig-000003/56170.jpg
3,3,fig-000004,Basic Figure,4,https://cdn.rebrickable.com/media/sets/fig-000004/60741.jpg
4,4,fig-000005,Captain America with Short Legs,3,https://cdn.rebrickable.com/media/sets/fig-000005/55964.jpg


In [None]:
cast_in.head()

Unnamed: 0,MOVIE_ID,CAST_ID,CHARAC
0,862,31,Woody (voice)
1,862,12898,Buzz Lightyear (voice)
2,862,7167,Mr. Potato Head (voice)
3,862,12899,Slinky Dog (voice)
4,862,12900,Rex (voice)


In [None]:
movies.head()
print(movies.loc[movies['MOVIE_ID'] == '862'])

      MOVIE_ID      TITLE
15893      862  Toy Story


In [None]:
actors.head()

Unnamed: 0,ID,NAME
0,104117,Maria Harper
1,234026,Marie-Ginette Guay
2,554832,Pauline Martin
3,1296769,Robert Reynaert
4,142775,Marika Lhoumeau


In [None]:
def clean_minifig_name(name):
  # Remove text after first comma
  sep = ','
  name = name.split(sep, 1)[0]
  # Remove text after "in"
  sep = ' in '
  name = name.split(sep, 1)[0]
  # Remove text after "with"
  sep = ' with '
  name = name.split(sep, 1)[0]
  # Remove word "professor"
  name = name.replace('Professor', '')
  name = name.strip()

  return name

def clean_minifigs(minifigs):
  minifigs["name"] = minifigs["name"].map(clean_minifig_name)

  return minifigs


In [None]:
valid_movies = {"Ghost Busters", "Harry Potter", "Indiana Jones", "Jurassic World", "Pirates of the Caribbean", "Star Wars", "Stranger Things"}

minifigs = clean_minifigs(minifigs)

cast_in['MOVIE_ID']=cast_in['MOVIE_ID'].astype(np.int64)
movies['MOVIE_ID']=movies['MOVIE_ID'].astype(np.int64)

movie_characters = cast_in.merge(movies, on='MOVIE_ID', how='inner')

movie_characters.head()

cast_ids = []
fig_nums = []

used_pairs = set()


for index, minifig in minifigs.iterrows():
  if minifig["name"] == "Stormtrooper" or minifig["name"] == "Woman":
    continue
  possible_characters = movie_characters.loc[movie_characters["CHARAC"] == minifig["name"]]
  for index, character in possible_characters.iterrows():
    for movie in valid_movies:
      if movie in character["TITLE"]:
        cast = character["CAST_ID"]
        fig_num = minifig["fig_num"]
        if (cast, fig_num) not in used_pairs:
          cast_ids.append(character["CAST_ID"])
          fig_nums.append(minifig["fig_num"])
          used_pairs.add((cast, fig_num))
        break

data = {"cast_id": cast_ids, "fig_num": fig_nums}

minifig_cast_mapping = pd.DataFrame(data)

minifig_cast_mapping.head()

Unnamed: 0,cast_id,fig_num
0,10980,fig-000029
1,186151,fig-000055
2,1427935,fig-000058
3,1770474,fig-000110
4,24343,fig-000124


In [None]:
minifig_cast_mapping.head()
minifig_cast_mapping.to_csv("/content/drive/My Drive/550_project/minifig_cast_mapping.csv")

In [None]:
temp = minifig_cast_mapping.merge(minifigs, on="fig_num")
temp.head()

Unnamed: 0.1,cast_id,fig_num,Unnamed: 0,name,num_parts,image_url
0,10980,fig-000029,28,Harry Potter,4,https://cdn.rebrickable.com/media/sets/fig-000029/60572.jpg
1,186151,fig-000055,54,Itchy,3,https://cdn.rebrickable.com/media/sets/fig-000055/65305.jpg
2,1427935,fig-000058,57,Clone Trooper,4,https://cdn.rebrickable.com/media/sets/fig-000058/66020.jpg
3,1770474,fig-000110,108,Mermaid,4,https://cdn.rebrickable.com/media/sets/fig-000110/71931.jpg
4,24343,fig-000124,121,Chewbacca,3,https://cdn.rebrickable.com/media/sets/fig-000124/58717.jpg


In [None]:
final = temp.merge(actors, left_on="cast_id", right_on="ID")
final.head(300)

Unnamed: 0.1,cast_id,fig_num,Unnamed: 0,name,num_parts,image_url,ID,NAME
0,10980,fig-000029,28,Harry Potter,4,https://cdn.rebrickable.com/media/sets/fig-000029/60572.jpg,10980,Daniel Radcliffe
1,10980,fig-000276,270,Harry Potter,4,https://cdn.rebrickable.com/media/sets/fig-000276/60755.jpg,10980,Daniel Radcliffe
2,10980,fig-000457,449,Harry Potter,4,https://cdn.rebrickable.com/media/sets/fig-000457/60621.jpg,10980,Daniel Radcliffe
3,10980,fig-000471,463,Harry Potter,5,https://cdn.rebrickable.com/media/sets/fig-000471/60211.jpg,10980,Daniel Radcliffe
4,10980,fig-000595,587,Harry Potter,5,https://cdn.rebrickable.com/media/sets/fig-000595/60218.jpg,10980,Daniel Radcliffe
...,...,...,...,...,...,...,...,...
295,6,fig-003509,3459,C-3PO,3,https://cdn.rebrickable.com/media/sets/fig-003509/58548.jpg,6,Anthony Daniels
296,6,fig-004046,3970,C-3PO,3,https://cdn.rebrickable.com/media/sets/fig-004046/65475.jpg,6,Anthony Daniels
297,6,fig-004737,4616,C-3PO,4,https://cdn.rebrickable.com/media/sets/fig-004737/59236.jpg,6,Anthony Daniels
298,10990,fig-000593,585,Hermione Granger,4,https://cdn.rebrickable.com/media/sets/fig-000593/60802.jpg,10990,Emma Watson
