In [1]:
import pandas as pd

In [2]:
movie_df = pd.read_csv("../backend/data/movies_2020_new.csv")
principal_df = pd.read_csv("../backend/data/new_title_principals.csv")
name_df = pd.read_csv("../backend/data/new_name_basics.csv")

In [3]:
# left join movie_df with principal_df
movie_df = movie_df.merge(principal_df, on="tconst", how="left")
# left join movie_df with name_df
movie_df = movie_df.merge(name_df, left_on="nconst", right_on="nconst", how="left")

In [4]:
movie_df.columns

Index(['tconst', 'titleType', 'primaryTitle', 'originalTitle', 'isAdult',
       'startYear', 'runtimeMinutes', 'genres', 'posterPath', 'backdropPath',
       'trailerPath', 'description', 'ordering', 'nconst', 'category', 'job',
       'characters', 'primaryName', 'birthYear', 'deathYear',
       'primaryProfession', 'knownForTitles'],
      dtype='object')

In [5]:
movie_director_df = movie_df[(movie_df["category"] == "director")].reset_index(drop=True)

In [6]:
movie_director_df["primaryName"].fillna("Unknown", inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  movie_director_df["primaryName"].fillna("Unknown", inplace=True)


In [7]:
movie_director_list_df = movie_director_df.groupby("tconst")["primaryName"].apply(lambda x: ', '.join(x)).reset_index()
movie_df = pd.read_csv("../backend/data/movies_2020_new.csv")
movie_director_df = movie_df.merge(movie_director_list_df, on="tconst", how="left")
# movie_director_df = movie_df.drop(columns=["nconst", "category"]).merge(movie_director_list_df, on="tconst", how="left")
movie_director_df.shape

(16046, 13)

In [8]:
movie_director_df.columns

Index(['tconst', 'titleType', 'primaryTitle', 'originalTitle', 'isAdult',
       'startYear', 'runtimeMinutes', 'genres', 'posterPath', 'backdropPath',
       'trailerPath', 'description', 'primaryName'],
      dtype='object')

In [9]:
movie_director_df.rename(columns={"primaryName": "directors"}, inplace=True)

In [10]:
movie_director_df = movie_director_df[["tconst", "primaryTitle", "startYear", "genres", "posterPath", "description", "directors"]]

In [11]:
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\\n", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\\t", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\\r", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\\", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\n", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\t", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\r", " ", regex=True)
movie_director_df["description"] = movie_director_df["description"].str.replace(r"\\", " ", regex=True)

In [12]:
movie_director_df.tail()

Unnamed: 0,tconst,primaryTitle,startYear,genres,posterPath,description,directors
16041,tt9898844,The Hunting,2021,"['Horror', 'Mystery', 'Thriller']",https://image.tmdb.org/t/p/w500/kvhrltQIRp1u84...,When a mysterious animal attack leaves a mutil...,Mark Andrew Hamer
16042,tt9900940,Scrapper,2021,"['Crime', 'Drama', 'Thriller']",https://image.tmdb.org/t/p/w500/ydnOAvlBzdqMyB...,"Ex-con turned ""scrapper"" Jake leads a quiet li...",Bari Kang
16043,tt9904648,The Contrast,2022,['Comedy'],https://image.tmdb.org/t/p/w500/sioXCmW4Q2VXLY...,On the day before her wedding to a hipster pla...,"Sean Dube, Presley Paras"
16044,tt9907782,The Cursed,2021,"['Fantasy', 'Horror', 'Mystery']",https://image.tmdb.org/t/p/w500/bewmBcjJxHeipS...,"In the late 19th century, a brutal land baron ...",Sean Ellis
16045,tt9914972,Blind Ambition,2021,['Documentary'],https://image.tmdb.org/t/p/w500/dVTPl57aAkGeui...,The inspiring story of four Zimbabwean men who...,"Warwick Ross, Robert Coe"


In [13]:
movie_director_df.to_csv("../backend/data/movie_director.csv", index=False, encoding="utf-8", sep="\t")