In [None]:
import pandas as pd

# Load the data
df_oscars = pd.read_csv("oscars_merged.csv")
df_names = pd.read_csv("name_basics_data.tsv", sep='\t')

# Extract the relevant columns from the Oscars data
df_oscars = df_oscars[['directors', 'writers']]

# Divide the entries into the columns
df_oscars['directors'] = df_oscars['directors'].apply(lambda x: x.split(','))
df_oscars['writers'] = df_oscars['writers'].apply(lambda x: x.split(','))

# Unpack the lists into separate lines
df_directors = df_oscars.explode('directors')
df_writers = df_oscars.explode('writers')

# Merge the data with the names based on nconst
merged_directors = pd.merge(df_directors, df_names[['nconst', 'primaryName', 'knownForTitles']], left_on='directors', right_on='nconst', how='left')
merged_writers = pd.merge(df_writers, df_names[['nconst', 'primaryName', 'knownForTitles']], left_on='writers', right_on='nconst', how='left')

# Convert the lists to strings to remove duplicates
merged_directors['knownForTitles'] = merged_directors['knownForTitles'].apply(lambda x: ','.join(x) if isinstance(x, list) else x)
merged_writers['knownForTitles'] = merged_writers['knownForTitles'].apply(lambda x: ','.join(x) if isinstance(x, list) else x)

# Remove duplicates
merged_directors = merged_directors.drop_duplicates(subset=['directors', 'primaryName', 'knownForTitles'])
merged_writers = merged_writers.drop_duplicates(subset=['writers', 'primaryName', 'knownForTitles'])

# Select only the relevant columns
merged_directors = merged_directors[['directors', 'primaryName', 'knownForTitles']]
merged_writers = merged_writers[['writers', 'primaryName', 'knownForTitles']]

# Save the merged data in new files
merged_directors.to_csv("directors_merged.csv", index=False)
merged_writers.to_csv("writers_merged.csv", index=False)


