        
# Extraction, Transformation and Loading Process (ETL)

# STEP 1: Loading and viewing the data

In [177]:
# Import the necessary libraries for data analysis and manipulation
import pandas as pd # Import the pandas library for data analysis and manipulation
import numpy as np # Import the numpy library for numerical operations and array manipulation
import ast # Import the ast library for literal evaluation of strings

In [178]:
# Read the CSV file containing the movies dataset and store it in a DataFrame with an url of google drive
url = 'https://drive.google.com/file/d/1xYjFUQ0-seDttd6Dj0J1x0E5PIHyR_Oy/view?usp=sharing'

url='https://drive.google.com/uc?id=' + url.split('/')[-2]

df_movies = pd.read_csv(url, low_memory=False) 

# df_movies = pd.read_csv('../data/movies_dataset.csv', low_memory=False)

  

In [179]:
# Set display option to show all columns
pd.set_option('display.max_columns', None)

# Reset display option to default settings
# pd.reset_option('display.max_columns')

In [180]:
# Display the first 5 rows of the DataFrame to get an initial idea of the data
df_movies.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [181]:
# Display general information about the DataFrame, including data types and number of non-null values in each column
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  45466 non-null  object 
 1   belongs_to_collection  4494 non-null   object 
 2   budget                 45466 non-null  object 
 3   genres                 45466 non-null  object 
 4   homepage               7782 non-null   object 
 5   id                     45466 non-null  object 
 6   imdb_id                45449 non-null  object 
 7   original_language      45455 non-null  object 
 8   original_title         45466 non-null  object 
 9   overview               44512 non-null  object 
 10  popularity             45461 non-null  object 
 11  poster_path            45080 non-null  object 
 12  production_companies   45463 non-null  object 
 13  production_countries   45463 non-null  object 
 14  release_date           45379 non-null  object 
 15  re

In [182]:
# Print the number of null values in each column of the DataFrame
print(df_movies.isnull().sum())

adult                        0
belongs_to_collection    40972
budget                       0
genres                       0
homepage                 37684
id                           0
imdb_id                     17
original_language           11
original_title               0
overview                   954
popularity                   5
poster_path                386
production_companies         3
production_countries         3
release_date                87
revenue                      6
runtime                    263
spoken_languages             6
status                      87
tagline                  25054
title                        6
video                        6
vote_average                 6
vote_count                   6
dtype: int64


---------------

# STEP 2: Unnest the column 'belongs_to_collection'

In [183]:
 # Function to convert the stringified column to its corresponding dictionary format
def parse_col(col):
    try:
        return ast.literal_eval(col)  # Attempt to convert the string to a dictionary
    except ValueError:
        return None  # If the conversion fails, return None

# Apply the function to the 'belongs_to_collection' column
df_movies['belongs_to_collection'] = df_movies['belongs_to_collection'].apply(parse_col)

# Unnest the 'belongs_to_collection' column into separate fields
df_movies = pd.concat([df_movies.drop(columns=['belongs_to_collection']), df_movies['belongs_to_collection'].apply(pd.Series).add_prefix('belongs_to_collection_')], axis=1)

# Display the first 5 rows of the DataFrame to check the change
df_movies.head()

Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,belongs_to_collection_0
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,,,,,
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,,,,,
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,96871.0,Father of the Bride Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,


2.1 Remove column 'belongs_to_collection_0' as it is not needed

In [184]:
# Remove the 'belongs_to_collection_0' column that we no longer need
df_movies = df_movies.drop(columns=['belongs_to_collection_0'])

---

# STEP 3: Treatment of null values

In [185]:
# Fill null values in 'revenue' and 'budget' with 0
df_movies['revenue'] = df_movies['revenue'].fillna(0)
df_movies['budget'] = df_movies['budget'].fillna(0)

In [186]:
# Drop rows with null values in 'release_date'
df_movies = df_movies.dropna(subset=['release_date'])

In [187]:
# Check for erroneous values in 'release_date'
df_movies['release_date'].sort_values()

19730             1
29503            12
34940    1874-12-09
34937    1878-06-14
41602    1883-11-19
            ...    
38130    2018-04-25
30402    2018-11-07
38885    2018-12-31
26559    2020-12-16
35587            22
Name: release_date, Length: 45379, dtype: object

In [188]:
# Drop the rows with erroneous values by index
df_movies = df_movies.drop([19730, 29503, 35587])

In [189]:
# Reset the DataFrame indices
df_movies.reset_index(drop=True, inplace=True)

In [190]:
# Check for erroneous values in 'release_date'
df_movies['release_date'].sort_values()

34895    1874-12-09
34892    1878-06-14
41533    1883-11-19
34888    1887-08-18
34889    1888-01-01
            ...    
44453    2018-04-04
38074    2018-04-25
30368    2018-11-07
38827    2018-12-31
26529    2020-12-16
Name: release_date, Length: 45376, dtype: object

---

# STEP 4: Format the dates and extract the year from the release date and add a new column called 'release_year'

In [191]:
# Format the dates to YYYY-mm-dd format using .loc
df_movies.loc[:, 'release_date'] = pd.to_datetime(df_movies['release_date'], errors='coerce').dt.strftime('%Y-%m-%d')

# Extract the year from the release date and create the 'release_year' column using .loc
df_movies.loc[:, 'release_year'] = pd.to_datetime(df_movies['release_date'], errors='coerce').dt.year
df_movies['release_year'] = df_movies['release_year'].fillna(0).astype(int)

# Display the DataFrame with the new 'release_year' column
df_movies.head()

Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,release_year
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,1995
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,,,,,1995
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,1995
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,,,,,1995
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,96871.0,Father of the Bride Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,1995


---

# STEP 5: Pass the non-numeric values ​​to Nan and Create new column 'return'

In [192]:
# Replace non-numeric values in 'budget' column with zeros
df_movies['budget'] = pd.to_numeric(df_movies['budget'], errors='coerce').fillna(0)

# Calculate the return on investment
df_movies['return'] = df_movies['revenue'] / df_movies['budget']
df_movies['return'] = df_movies['return'].fillna(0)

# Display the DataFrame with the new 'return' column
df_movies.head()


Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,release_year,return
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,1995,12.451801
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,,,,,1995,4.043035
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,1995,0.0
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,,,,,1995,5.09076
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,96871.0,Father of the Bride Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,1995,inf


5.1 replace the infinite numbers of the column return to zero

In [193]:
# Replace infinite values with zeros in the 'return' column
df_movies['return'] = df_movies['return'].replace([np.inf, -np.inf], 0)

# Display the DataFrame with the corrected 'return' column
df_movies.head()

Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,release_year,return
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,1995,12.451801
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,,,,,1995,4.043035
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,1995,0.0
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,,,,,1995,5.09076
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,96871.0,Father of the Bride Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,1995,0.0


---

# STEP 6: Extracting Specific Values from Dictionary Columns

In [194]:
def convert_to_dict(value):
    
    # Checks if the value is null (NaN in this case). 
    if pd.isna(value):
        return None
    else:                               
        return ast.literal_eval(value) # If it's null, it returns None. If it's not null, it tries to convert it into a dictionary using ast.literal_eval().


def extract_values(df='', col='', key_name=''):
    
     
    #Applies the convert_to_dict function to the specified column of the dataframe. 
    df[col] = df[col].apply(convert_to_dict)
    col_list = df[col].to_list() # Converts the dataframe's column  into a Python list.

        
    # Creates a list of lists, where each inner list contains the dictionary values for the specified key. If the key is not in the dictionary, nothing is added to the inner list.
    list_extract = [[item[key_name] for item in sublist if key_name in item] for sublist in col_list]

    return list_extract

---

# STEP 7: Unnesting and Merging Relevant Columns

In [195]:
# Define the columns to process and the keys to extract from the dictionaries in each column
columns = ['genres', 'production_companies', 'production_countries', 'spoken_languages']
key_list = ['name', 'name','name', 'name']

# Create an empty auxiliary dataframe to store the results
df_movies_aux = pd.DataFrame()

# Loop through each column and corresponding key
for _, (column, key) in enumerate(zip(columns, key_list)):
    # Extract the list of elements corresponding to the key from each dictionary in the column
    elements_list = extract_values(df=df_movies, col=column, key_name=key)
    
    # Create a dataframe with the results and append it to the auxiliary dataframe
    df_aux = pd.DataFrame({column: elements_list})
    df_movies_aux = pd.concat([df_movies_aux, df_aux], axis=1)

# Add the 'id' column from the original dataframe to the auxiliary dataframe
df_movies_aux = pd.concat([df_movies_aux, df_movies['id']], axis=1)

# Display the dataframe on screen
df_movies.head()


Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,release_year,return
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,10194.0,Toy Story Collection,/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg,/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg,1995,12.451801
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,,,,,1995,4.043035
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,119050.0,Grumpy Old Men Collection,/nLvUdqgPgm3F85NMCii9gVFUcet.jpg,/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg,1995,0.0
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,,,,,1995,5.09076
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,96871.0,Father of the Bride Collection,/nts4iOmNnq7GNicycMJ9pSAn204.jpg,/7qwE57OVZmMJChBpLEbJEmzUydk.jpg,1995,0.0


---

# STEP 8: Make a backup of the original data frame

In [196]:
# Make a backup copy so as not to lose the data from the original dataframe 
df_movies_copy = df_movies

---

# STEP 9: Remove unnecessary columns

In [197]:
# Drop the unnecessary columns, as well as others that will not be used
df_movies_copy.drop(['genres','production_companies', 'production_countries', 'spoken_languages', 
                     'belongs_to_collection_id', 'belongs_to_collection_poster_path',
                     'belongs_to_collection_backdrop_path', 'video','imdb_id','adult',
                     'original_title','poster_path','homepage'],axis=1,inplace=True)

# Display the dataframe on screen
df_movies_copy.head()


Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,belongs_to_collection_name,release_year,return
0,30000000,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,Released,,Toy Story,7.7,5415.0,Toy Story Collection,1995,12.451801
1,65000000,8844,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,Released,Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0,,1995,4.043035
2,0,15602,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,101.0,Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0,Grumpy Old Men Collection,1995,0.0
3,16000000,31357,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,127.0,Released,Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0,,1995,5.09076
4,0,11862,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,106.0,Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0,Father of the Bride Collection,1995,0.0


----

# STEP 10: Remove duplicates in the 'df_movies_copy' dataframe

In [198]:
# Check how many duplicates are there in the 'id' column of the dataframe 'df_movies_copy'
print(df_movies_copy.duplicated(subset='id').sum())

30


In [199]:
# See the length of the 'id' column
len(df_movies_copy['id'])

45376

In [200]:
# Remove duplicates
df_movies_copy.drop_duplicates(subset='id', inplace=True)

In [201]:
# Reset the DataFrame indices
df_movies_copy.reset_index(drop=True, inplace=True)

In [202]:
# See the length of the 'id' column after removing duplicates
len(df_movies_copy['id'])

45346

In [203]:
# Check if there are still duplicates
print(df_movies_copy.duplicated(subset='id').sum())


0


----

# STEP 11: Join 'df_movies_copy' with 'df_movies_aux'

In [204]:
# Check the data type of 'id' column in 'df_movies'
print(df_movies['id'].dtypes)

object


In [205]:
# Convert the 'id' column to an integer in both dataframes
df_movies_copy['id'] = df_movies_copy['id'].astype(int)
df_movies_aux['id'] = df_movies_aux['id'].astype(int)

In [206]:
# Check the result
print(df_movies_copy['id'].dtypes)
print(df_movies_aux['id'].dtypes)

int32
int32


In [207]:
# Remove duplicates df_movies_aux
df_movies_aux.drop_duplicates(subset='id', inplace=True)

In [208]:
# Reset the df_movies_aux indices
df_movies_aux.reset_index(drop=True, inplace=True)

In [209]:
# Merge the dataframes 'df_movies_copy' and 'df_movies_aux' using the 'id' column
df_movies_combined = df_movies_copy.merge(df_movies_aux, how='inner', on='id')

In [210]:
# View the resulting dataframe 'df_movies_combined'
df_movies_combined

Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,belongs_to_collection_name,release_year,return,genres,production_companies,production_countries,spoken_languages
0,30000000,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,Released,,Toy Story,7.7,5415.0,Toy Story Collection,1995,12.451801,"[Animation, Comedy, Family]",[Pixar Animation Studios],[United States of America],[English]
1,65000000,8844,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,Released,Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0,,1995,4.043035,"[Adventure, Fantasy, Family]","[TriStar Pictures, Teitler Film, Interscope Co...",[United States of America],"[English, Français]"
2,0,15602,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,101.0,Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0,Grumpy Old Men Collection,1995,0.000000,"[Romance, Comedy]","[Warner Bros., Lancaster Gate]",[United States of America],[English]
3,16000000,31357,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,127.0,Released,Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0,,1995,5.090760,"[Comedy, Drama, Romance]",[Twentieth Century Fox Film Corporation],[United States of America],[English]
4,0,11862,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,106.0,Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0,Father of the Bride Collection,1995,0.000000,[Comedy],"[Sandollar Productions, Touchstone Pictures]",[United States of America],[English]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45341,0,30840,en,"Yet another version of the classic epic, with ...",5.683753,1991-05-13,0.0,104.0,Released,,Robin Hood,5.7,26.0,,1991,0.000000,"[Drama, Action, Romance]","[Westdeutscher Rundfunk (WDR), Working Title F...","[Canada, Germany, United Kingdom, United State...",[English]
45342,0,111109,tl,An artist struggles to finish his work while a...,0.178241,2011-11-17,0.0,360.0,Released,,Century of Birthing,9.0,3.0,,2011,0.000000,[Drama],[Sine Olivia],[Philippines],[]
45343,0,67758,en,"When one of her hits goes wrong, a professiona...",0.903007,2003-08-01,0.0,90.0,Released,A deadly game of wits.,Betrayal,3.8,6.0,,2003,0.000000,"[Action, Drama, Thriller]",[American World Pictures],[United States of America],[English]
45344,0,227506,en,"In a small town live two brothers, one a minis...",0.003503,1917-10-21,0.0,87.0,Released,,Satan Triumphant,0.0,0.0,,1917,0.000000,[],[Yermoliev],[Russia],[]


----

# STEP 12: Working with 'credits.csv'

In [211]:
# Read 'credits.csv' file and assign it to 'df_credits'
df_credits = pd.read_csv('../data/credits.csv')   

In [212]:
print(df_credits.duplicated(subset='id').sum())

44


In [213]:
# Drop duplicates in 'id' column in 'df_credits'
df_credits.drop_duplicates(subset='id', inplace=True)

In [214]:
# Reset the DataFrame indices
df_credits.reset_index(drop=True, inplace=True)

In [215]:
# Check if there are still duplicates in 'id' column
print(df_credits.duplicated(subset='id').sum())

0


# STEP 13: Transformation and extraction of data in the 'crew' and 'cast' columns

In [216]:
# Convert character strings into dictionary lists ('crew')
df_credits['crew'] = df_credits['crew'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])

In [217]:
# Extract the names of the 'crew' column directors.
df_credits['crew'] = df_credits['crew'].apply(lambda x: [elem['name'] for elem in x if elem['job'] == 'Director'])

In [218]:
df_credits

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...",[John Lasseter],862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...",[Joe Johnston],8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...",[Howard Deutch],15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...",[Forest Whitaker],31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...",[Charles Shyer],11862
...,...,...,...
45427,"[{'cast_id': 0, 'character': '', 'credit_id': ...",[Hamid Nematollah],439050
45428,"[{'cast_id': 1002, 'character': 'Sister Angela...",[Lav Diaz],111109
45429,"[{'cast_id': 6, 'character': 'Emily Shaw', 'cr...",[Mark L. Lester],67758
45430,"[{'cast_id': 2, 'character': '', 'credit_id': ...",[Yakov Protazanov],227506


In [219]:
# Convert character strings into dictionary lists ('cast')
df_credits['cast'] = df_credits['cast'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])

In [220]:
# Extract the names of the actors from the cast column.
df_credits['cast'] = df_credits['cast'].apply(lambda x: [elem['name'] for elem in x if elem['name'] ])

In [221]:
df_credits

Unnamed: 0,cast,crew,id
0,"[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter],862
1,"[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston],8844
2,"[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch],15602
3,"[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker],31357
4,"[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer],11862
...,...,...,...
45427,"[Leila Hatami, Kourosh Tahami, Elham Korda]",[Hamid Nematollah],439050
45428,"[Angel Aquino, Perry Dizon, Hazel Orencio, Joe...",[Lav Diaz],111109
45429,"[Erika Eleniak, Adam Baldwin, Julie du Page, J...",[Mark L. Lester],67758
45430,"[Iwan Mosschuchin, Nathalie Lissenko, Pavel Pa...",[Yakov Protazanov],227506


---

# PASO 14: Merging the Movies and Credits DataFrames

In [222]:
# Merge the df_movies_combined and df dataframes on the 'id' column
df_combined = df_movies_combined.merge(df_credits, how='inner', on='id')

In [223]:
# Check the result of the merge
df_combined

Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,belongs_to_collection_name,release_year,return,genres,production_companies,production_countries,spoken_languages,cast,crew
0,30000000,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,Released,,Toy Story,7.7,5415.0,Toy Story Collection,1995,12.451801,"[Animation, Comedy, Family]",[Pixar Animation Studios],[United States of America],[English],"[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter]
1,65000000,8844,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,Released,Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0,,1995,4.043035,"[Adventure, Fantasy, Family]","[TriStar Pictures, Teitler Film, Interscope Co...",[United States of America],"[English, Français]","[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston]
2,0,15602,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,101.0,Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0,Grumpy Old Men Collection,1995,0.000000,"[Romance, Comedy]","[Warner Bros., Lancaster Gate]",[United States of America],[English],"[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch]
3,16000000,31357,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,127.0,Released,Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0,,1995,5.090760,"[Comedy, Drama, Romance]",[Twentieth Century Fox Film Corporation],[United States of America],[English],"[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker]
4,0,11862,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,106.0,Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0,Father of the Bride Collection,1995,0.000000,[Comedy],"[Sandollar Productions, Touchstone Pictures]",[United States of America],[English],"[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45340,0,30840,en,"Yet another version of the classic epic, with ...",5.683753,1991-05-13,0.0,104.0,Released,,Robin Hood,5.7,26.0,,1991,0.000000,"[Drama, Action, Romance]","[Westdeutscher Rundfunk (WDR), Working Title F...","[Canada, Germany, United Kingdom, United State...",[English],"[Patrick Bergin, Uma Thurman, David Morrissey,...",[John Irvin]
45341,0,111109,tl,An artist struggles to finish his work while a...,0.178241,2011-11-17,0.0,360.0,Released,,Century of Birthing,9.0,3.0,,2011,0.000000,[Drama],[Sine Olivia],[Philippines],[],"[Angel Aquino, Perry Dizon, Hazel Orencio, Joe...",[Lav Diaz]
45342,0,67758,en,"When one of her hits goes wrong, a professiona...",0.903007,2003-08-01,0.0,90.0,Released,A deadly game of wits.,Betrayal,3.8,6.0,,2003,0.000000,"[Action, Drama, Thriller]",[American World Pictures],[United States of America],[English],"[Erika Eleniak, Adam Baldwin, Julie du Page, J...",[Mark L. Lester]
45343,0,227506,en,"In a small town live two brothers, one a minis...",0.003503,1917-10-21,0.0,87.0,Released,,Satan Triumphant,0.0,0.0,,1917,0.000000,[],[Yermoliev],[Russia],[],"[Iwan Mosschuchin, Nathalie Lissenko, Pavel Pa...",[Yakov Protazanov]


----

# PASO 15: Exporting the Final DataFrame to CSV

In [224]:
# Exports the final DataFrame to a CSV file, without including the indices
df_combined.to_csv('../data/movies_dataset_final2.csv', index=False)

In [225]:
# Check the final DataFrame
df_combined

Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,status,tagline,title,vote_average,vote_count,belongs_to_collection_name,release_year,return,genres,production_companies,production_countries,spoken_languages,cast,crew
0,30000000,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,Released,,Toy Story,7.7,5415.0,Toy Story Collection,1995,12.451801,"[Animation, Comedy, Family]",[Pixar Animation Studios],[United States of America],[English],"[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter]
1,65000000,8844,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,Released,Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0,,1995,4.043035,"[Adventure, Fantasy, Family]","[TriStar Pictures, Teitler Film, Interscope Co...",[United States of America],"[English, Français]","[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston]
2,0,15602,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,101.0,Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0,Grumpy Old Men Collection,1995,0.000000,"[Romance, Comedy]","[Warner Bros., Lancaster Gate]",[United States of America],[English],"[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch]
3,16000000,31357,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,127.0,Released,Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0,,1995,5.090760,"[Comedy, Drama, Romance]",[Twentieth Century Fox Film Corporation],[United States of America],[English],"[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker]
4,0,11862,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,106.0,Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0,Father of the Bride Collection,1995,0.000000,[Comedy],"[Sandollar Productions, Touchstone Pictures]",[United States of America],[English],"[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45340,0,30840,en,"Yet another version of the classic epic, with ...",5.683753,1991-05-13,0.0,104.0,Released,,Robin Hood,5.7,26.0,,1991,0.000000,"[Drama, Action, Romance]","[Westdeutscher Rundfunk (WDR), Working Title F...","[Canada, Germany, United Kingdom, United State...",[English],"[Patrick Bergin, Uma Thurman, David Morrissey,...",[John Irvin]
45341,0,111109,tl,An artist struggles to finish his work while a...,0.178241,2011-11-17,0.0,360.0,Released,,Century of Birthing,9.0,3.0,,2011,0.000000,[Drama],[Sine Olivia],[Philippines],[],"[Angel Aquino, Perry Dizon, Hazel Orencio, Joe...",[Lav Diaz]
45342,0,67758,en,"When one of her hits goes wrong, a professiona...",0.903007,2003-08-01,0.0,90.0,Released,A deadly game of wits.,Betrayal,3.8,6.0,,2003,0.000000,"[Action, Drama, Thriller]",[American World Pictures],[United States of America],[English],"[Erika Eleniak, Adam Baldwin, Julie du Page, J...",[Mark L. Lester]
45343,0,227506,en,"In a small town live two brothers, one a minis...",0.003503,1917-10-21,0.0,87.0,Released,,Satan Triumphant,0.0,0.0,,1917,0.000000,[],[Yermoliev],[Russia],[],"[Iwan Mosschuchin, Nathalie Lissenko, Pavel Pa...",[Yakov Protazanov]
