Exploration Fichiers CSV


In [94]:
import pandas as pd
import gdown

In [117]:
# Liens vers les fichiers de données IMDb
# Ces fichiers sont disponibles en téléchargement sur le site IMDb

link_title_basics = "https://datasets.imdbws.com/title.basics.tsv.gz"
link_title_akas = "https://datasets.imdbws.com/title.akas.tsv.gz"
link_title_crew = "https://datasets.imdbws.com/title.crew.tsv.gz"
link_title_episode = "https://datasets.imdbws.com/title.episode.tsv.gz"
link_title_principals = "https://datasets.imdbws.com/title.principals.tsv.gz"
link_title_ratings = "https://datasets.imdbws.com/title.ratings.tsv.gz"
link_name_basics = "https://datasets.imdbws.com/name.basics.tsv.gz"
link_tmdb = "https://drive.google.com/file/d/1VB5_gl1fnyBDzcIOXZ5vUSbCY68VZN1v/view"

In [92]:
# Fichier TMDB
file_id = "1VB5_gl1fnyBDzcIOXZ5vUSbCY68VZN1v"

output = "tmdb_movies.csv"

gdown.download(id=file_id,
               output=output,
               quiet=False,      # affiche la barre de progression
               fuzzy=True)  

Downloading...
From (original): https://drive.google.com/uc?id=1VB5_gl1fnyBDzcIOXZ5vUSbCY68VZN1v
From (redirected): https://drive.google.com/uc?id=1VB5_gl1fnyBDzcIOXZ5vUSbCY68VZN1v&confirm=t&uuid=19617231-47bb-4624-b0c6-5e0959c37b7e
To: c:\Users\User\Documents\Projet2\WildCodeSchool-Projet2\tmdb_movies.csv
100%|██████████| 157M/157M [00:04<00:00, 36.7MB/s] 


'tmdb_movies.csv'

In [96]:
df_title_basics = pd.read_csv(link_title_basics, sep="\t", low_memory=False)

In [None]:
# Début du traitement des fichiers IMDb
# Début nettoyage title.basics.tsv.gz
# Tous les genres de films existants dans le fichier title.basics.tsv.gz

genre_exploded = (
    df_title_basics["genres"]
    .str.split(",")
    .explode()
    .str.strip()
)

genre_exploded.unique()

array(['Documentary', 'Short', 'Animation', 'Comedy', 'Romance', 'Sport',
       'News', 'Drama', 'Fantasy', 'Horror', 'Biography', 'Music', 'War',
       'Crime', 'Western', 'Family', 'Adventure', 'Action', 'History',
       'Mystery', '\\N', 'Sci-Fi', 'Musical', 'Thriller', 'Film-Noir',
       'Talk-Show', 'Game-Show', 'Reality-TV', 'Adult', nan], dtype=object)

In [97]:
# Copy
df_title_basics_copy = df_title_basics.copy()
df_title_basics_copy.tail(3)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
11650013,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010,\N,\N,Drama
11650014,tt9916856,short,The Wind,The Wind,0,2015,\N,27,Short
11650015,tt9916880,tvEpisode,Horrid Henry Knows It All,Horrid Henry Knows It All,0,2014,\N,10,"Adventure,Animation,Comedy"


In [98]:
# Remplacer NaN et \\N par "Unknown"

df_title_basics_copy["genres"] = df_title_basics_copy["genres"].str.replace("\\N", "Unknown", regex=False)
df_title_basics_copy["genres"] = df_title_basics_copy["genres"].fillna("Unknown")


# Genre non gardé : Documentary / Short / News / Talk-show / Game-Show / Reality-TV / Adult 
# On les enlève de la liste des genres
list_genre_to_drop = ["Documentary", "Short", "News", "Talk-show", "Game-Show", "Reality-TV", "Adult"]

regex_pattern = '|'.join(map(lambda x: f'({x})', list_genre_to_drop))

df_title_basics_copy = df_title_basics_copy[~df_title_basics_copy['genres'].str.contains(regex_pattern, regex=True)]


display(df_title_basics_copy.head(3))

  df_title_basics_copy = df_title_basics_copy[~df_title_basics_copy['genres'].str.contains(regex_pattern, regex=True)]


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
2,tt0000003,short,Poor Pierrot,Pauvre Pierrot,0,1892,\N,5,"Animation,Comedy,Romance"
8,tt0000009,movie,Miss Jerry,Miss Jerry,0,1894,\N,45,Romance
208,tt0000211,short,"The Astronomer's Dream; or, The Man in the Moon",La lune à un mètre,0,1898,\N,3,"Comedy,Fantasy,Horror"


In [99]:
# On enlève les films sortis avant 1970
df_title_basics_copy['startYear'] = pd.to_numeric(df_title_basics_copy['startYear'], errors='coerce',downcast ='signed')
df_title_basics_copy = df_title_basics_copy[df_title_basics_copy['startYear'] >= 1970]

display(df_title_basics_copy)

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
11631,tt0011801,movie,Tötet nicht mehr,Tötet nicht mehr,0,2019.0,\N,\N,"Action,Crime"
15479,tt0015724,movie,Dama de noche,Dama de noche,0,1993.0,\N,102,"Drama,Mystery,Romance"
30892,tt0031458,tvEpisode,El huésped del sevillano,El huésped del sevillano,0,1970.0,\N,86,Musical
34794,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,\N,118,"Comedy,Fantasy,Romance"
35956,tt0036606,movie,"Another Time, Another Place","Another Time, Another Place",0,1983.0,\N,118,"Drama,War"
...,...,...,...,...,...,...,...,...,...
11650010,tt9916846,tvEpisode,Episode #3.18,Episode #3.18,0,2009.0,\N,\N,Drama
11650011,tt9916848,tvEpisode,Episode #3.17,Episode #3.17,0,2009.0,\N,\N,Drama
11650012,tt9916850,tvEpisode,Episode #3.19,Episode #3.19,0,2010.0,\N,\N,Drama
11650013,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010.0,\N,\N,Drama


In [100]:
# Verifier les différents types de films
df_title_basics_copy['titleType'].unique()

# On récupere uniquement les films
df_title_basics_copy = df_title_basics_copy[df_title_basics_copy['titleType'] == 'movie']

# On enlève les films pour adultes

df_title_basics_copy = df_title_basics_copy[df_title_basics_copy['isAdult'] == '0']

# On enlève les films sans titre
df_title_basics_copy['primaryTitle'] = df_title_basics_copy['primaryTitle'].fillna('None.')

display(df_title_basics_copy[df_title_basics_copy['primaryTitle'].isna() == False])




Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
11631,tt0011801,movie,Tötet nicht mehr,Tötet nicht mehr,0,2019.0,\N,\N,"Action,Crime"
15479,tt0015724,movie,Dama de noche,Dama de noche,0,1993.0,\N,102,"Drama,Mystery,Romance"
34794,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,\N,118,"Comedy,Fantasy,Romance"
35956,tt0036606,movie,"Another Time, Another Place","Another Time, Another Place",0,1983.0,\N,118,"Drama,War"
37408,tt0038086,movie,Shiva und die Galgenblume,Shiva und die Galgenblume,0,1993.0,\N,\N,Thriller
...,...,...,...,...,...,...,...,...,...
11649782,tt9916362,movie,Coven,Akelarre,0,2020.0,\N,92,"Drama,History"
11649814,tt9916428,movie,The Secret of China,Hong xing zhao yao Zhong guo,0,2019.0,\N,\N,"Adventure,History,War"
11649866,tt9916538,movie,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,0,2019.0,\N,123,Drama
11649946,tt9916706,movie,Dankyavar Danka,Dankyavar Danka,0,2013.0,\N,\N,Comedy


In [143]:
# Jointure entre df_title_basics_copy et df_tmdb, clé communune : tconst et imdb_id
df_merged = pd.merge(df_title_basics_copy, df_tmdb, left_on='tconst', right_on='imdb_id', how='left')

# On enlève les colonnes inutiles
df_merged = df_merged.drop(columns=['isAdult', 'originalTitle', 'endYear', 'adult', 'budget', 'homepage', 'imdb_id', 'original_title', 'titleType', 'runtime'])

# On transforme les colonnes 'runtimeMinutes', 'id', 'popularity', 'runtime', 'revenue', 'vote_average', 'vote_count' en numérique
df_merged['runtimeMinutes'] = pd.to_numeric(df_merged['runtimeMinutes'], errors='coerce', downcast='signed').astype('Int64')  
df_merged['id'] = pd.to_numeric(df_merged['id'], errors='coerce', downcast='integer').astype('Int64')  
df_merged['popularity'] = pd.to_numeric(df_merged['popularity'], errors='coerce', downcast='signed')
df_merged['revenue'] = pd.to_numeric(df_merged['revenue'], errors='coerce', downcast='signed')
df_merged['vote_average'] = pd.to_numeric(df_merged['vote_average'], errors='coerce', downcast='signed')
df_merged['vote_count'] = pd.to_numeric(df_merged['vote_count'], errors='coerce', downcast='signed').astype('Int64')

# On transforme la colonne 'startYear' en datetime
df_merged['startYear'] = df_merged['startYear'].astype('Int64')
df_merged['startYear'] = pd.to_datetime(df_merged['startYear'], format='%Y', errors='coerce').dt.to_period('Y')

# On transforme la colonne 'release_date' en datetime
df_merged['release_date'] = pd.to_datetime(df_merged['release_date'], format='%Y-%m-%d', errors='coerce').dt.date

# On filtre la colonne 'production_country' pour ne garder que les pays de production souhaités : USA,FR,GB
df_filtered = df_merged[df_merged['production_countries'].str.contains('USA|GB|FR', na=False)]

# On réorganise les colonnes : 'tconst', 'primaryTitle', 'title', 'startYear', 'release_date', 'genre_x', 'genre_y', 'production_countries', 'runtimeMinutes', 'vote_average', 'vote_count', 'popularity', 'revenue', 'tagline', 'overview', 'id'
df_filtered = df_filtered[['tconst', 'primaryTitle', 'title', 'startYear', 'release_date', 'genres_x', 'genres_y', 'production_countries', 'runtimeMinutes', 'vote_average', 'vote_count', 'popularity', 'revenue', 'tagline', 'overview', 'id']]

# Renomme la colonne 'genres_x' en 'genres_df_title_basics' et genres_y en 'genres_df_tmdb'
df_filtered = df_filtered.rename(columns={'genres_x': 'genres_df_title_basics', 'genres_y': 'genres_df_tmdb'})

# On enlève les doublons de la colonne 'primaryTitle'
df_filtered = df_filtered.drop_duplicates(subset=['primaryTitle'])

df_filtered

Unnamed: 0,tconst,primaryTitle,title,startYear,release_date,genres_df_title_basics,genres_df_tmdb,production_countries,runtimeMinutes,vote_average,vote_count,popularity,revenue,tagline,overview,id
3,tt0036606,"Another Time, Another Place","Another Time, Another Place",1983,1983-05-13,"Drama,War",['Drama'],['GB'],118,4.700,6,1.400,0.0,,Set in 1943 in Scotland during World War II. J...,73069
34,tt0062285,"Oh, Sun","Oh, Sun",1970,1973-01-04,Drama,['Drama'],"['FR', 'MR']",98,6.700,15,1.081,0.0,,"The film Soleil Ô, shot over four years with a...",195841
70,tt0064004,All the Right Noises,All the Right Noises,1970,1971-01-01,"Comedy,Drama","['Comedy', 'Drama']",['GB'],92,5.600,12,1.130,0.0,The story every man will understand and every ...,A married man with two small children begins a...,44198
75,tt0064078,Les libertines,Versatile Lovers,1970,1970-05-06,Drama,['Drama'],"['ES', 'FR', 'IT']",90,3.500,2,1.428,0.0,,Isabelle (Marisa Mell) runs a nursing home for...,3696
78,tt0064106,The Butcher,The Butcher,1970,1970-02-27,"Drama,Mystery,Thriller","['Thriller', 'Drama', 'Mystery']","['FR', 'IT']",93,7.066,197,6.793,0.0,,"An unlikely friendship between a dour, working...",2912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349679,tt9904802,Enemy Lines,Enemy Lines,2020,2020-04-24,"Action,Drama,War","['War', 'Drama']",['GB'],92,5.400,28,5.523,0.0,Courage has no border.,"In the frozen, war torn landscape of occupied ...",679796
349695,tt9908390,Le lion,The Lion,2020,2020-01-29,Comedy,['Comedy'],['FR'],95,5.328,172,7.731,0.0,,A psychiatric hospital patient pretends to be ...,589970
349699,tt9908636,Das Spiel der Hoffnung,Das Spiel der Hoffnung,2017,2017-05-25,Fantasy,['Fantasy'],['GB'],100,0.000,0,0.897,0.0,,Follows the central character from infant inno...,688906
349780,tt9916190,Safeguard,Safeguard,2020,2020-09-07,"Action,Adventure,Thriller","['Action', 'Thriller', 'Crime']",['GB'],95,6.500,2,6.124,0.0,Protecting your family comes at a cost.,When a Japanese restaurant is extorted by the ...,633490


In [None]:
df_title_akas = pd.read_csv(link_title_akas, sep="\t", low_memory=False)
df_title_akas.head()


In [138]:
# Création d'un df pour les titres de films en francais et suppression des colonnes inutiles
df_title_akas_clean = df_title_akas.copy()
df_title_akas_clean = df_title_akas_clean.drop(columns=['ordering', 'language', 'types', 'attributes', 'isOriginalTitle'])

# Affiche les titres de la région FR
df_title_akas_clean = df_title_akas_clean[df_title_akas_clean['region'] == 'FR']

# Changement de la colonne 'title' en 'frenchTitle'
df_title_akas_clean = df_title_akas_clean.rename(columns={'title': 'frenchTitle'})

df_title_akas_clean.head(3)

Unnamed: 0,titleId,frenchTitle,region
12,tt0000002,Le clown et ses chiens,FR
21,tt0000003,Pauvre Pierrot,FR
30,tt0000004,Un bon bock,FR


In [169]:
# Jointure entre df_filtered et df_title_akas_clean, clé communune : tconst et titleId
df_filtered_french_title = pd.merge(df_filtered, df_title_akas_clean, left_on='tconst', right_on='titleId', how='left')

#  Suppression des colonnes titleId, region
df_filtered_french_title = df_filtered_french_title.drop(columns=['titleId', 'region'], axis=1)

# Si frenchTitle est NaN, on remplace par primaryTitle
df_filtered_french_title['frenchTitle'] = df_filtered_french_title['frenchTitle'].fillna(df_filtered_french_title['primaryTitle'])


# Enleve les '[]' et les ' dans genres_df_tmdb
df_filtered_french_title['genres_df_tmdb'] = df_filtered_french_title['genres_df_tmdb'].apply(lambda x: str(x).replace('[','').replace(']','').replace('\'', '').replace(' ',''))

# Enleve les '[]' et les ' dans production_countries
df_filtered_french_title['production_countries'] = df_filtered_french_title['production_countries'].apply(lambda x: str(x).replace('[','').replace(']','').replace('\'', '').replace(' ',''))

# Concatene les colonnes 'genres_df_title_basics' et 'genres_df_tmdb' en une seule colonne 'genres' et supprime les termes en doublon de la colonne 'genres'
df_filtered_french_title['genres'] = df_filtered_french_title.apply(lambda x: ', '.join(set(str(x['genres_df_title_basics']).split(',') + str(x['genres_df_tmdb']).split(','))), axis=1)    

# Supprime les colonnes 'genres_df_title_basics' et 'genres_df_tmdb'
df_filtered_french_title = df_filtered_french_title.drop(columns=['genres_df_title_basics', 'genres_df_tmdb'], axis=1)

# Range les colonnes
df_filtered_french_title = df_filtered_french_title[['tconst', 'primaryTitle',  'title', 'frenchTitle','startYear', 'genres', 'production_countries', 'runtimeMinutes', 'vote_average', 'vote_count', 'popularity', 'revenue', 'tagline', 'overview', 'id']]

df_filtered_french_title

Unnamed: 0,tconst,primaryTitle,title,frenchTitle,startYear,genres,production_countries,runtimeMinutes,vote_average,vote_count,popularity,revenue,tagline,overview,id
0,tt0036606,"Another Time, Another Place","Another Time, Another Place",Les Coeurs captifs,1983,"War, Drama",GB,118,4.700,6,1.400,0.0,,Set in 1943 in Scotland during World War II. J...,73069
1,tt0062285,"Oh, Sun","Oh, Sun",Soleil Ô,1970,Drama,"FR,MR",98,6.700,15,1.081,0.0,,"The film Soleil Ô, shot over four years with a...",195841
2,tt0064004,All the Right Noises,All the Right Noises,All the Right Noises,1970,"Comedy, Drama",GB,92,5.600,12,1.130,0.0,The story every man will understand and every ...,A married man with two small children begins a...,44198
3,tt0064078,Les libertines,Versatile Lovers,Les belles au bois dormantes,1970,Drama,"ES,FR,IT",90,3.500,2,1.428,0.0,,Isabelle (Marisa Mell) runs a nursing home for...,3696
4,tt0064078,Les libertines,Versatile Lovers,Les libertines,1970,Drama,"ES,FR,IT",90,3.500,2,1.428,0.0,,Isabelle (Marisa Mell) runs a nursing home for...,3696
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16488,tt9904802,Enemy Lines,Enemy Lines,Enemy Lines,2020,"Action, War, Drama",GB,92,5.400,28,5.523,0.0,Courage has no border.,"In the frozen, war torn landscape of occupied ...",679796
16489,tt9908390,Le lion,The Lion,Le lion,2020,Comedy,FR,95,5.328,172,7.731,0.0,,A psychiatric hospital patient pretends to be ...,589970
16490,tt9908636,Das Spiel der Hoffnung,Das Spiel der Hoffnung,Das Spiel der Hoffnung,2017,Fantasy,GB,100,0.000,0,0.897,0.0,,Follows the central character from infant inno...,688906
16491,tt9916190,Safeguard,Safeguard,Safeguard,2020,"Action, Thriller, Crime, Adventure",GB,95,6.500,2,6.124,0.0,Protecting your family comes at a cost.,When a Japanese restaurant is extorted by the ...,633490


In [160]:
# Votes differents entre les deux df 
df_title_ratings = pd.read_csv(link_title_ratings, sep="\t", low_memory=False)
df_title_ratings[df_title_ratings['tconst'] == 'tt0062285']

Unnamed: 0,tconst,averageRating,numVotes
42480,tt0062285,7.3,939


In [166]:
df_filtered_ratings = pd.merge(df_filtered_french_title, df_title_ratings, left_on='tconst', right_on='tconst', how='left')
df_filtered_ratings.head(3)

Unnamed: 0,tconst,primaryTitle,title,frenchTitle,startYear,release_date,genres_df_title_basics,genres_df_tmdb,production_countries,runtimeMinutes,vote_average,vote_count,popularity,revenue,tagline,overview,id,genres,averageRating,numVotes
0,tt0036606,"Another Time, Another Place","Another Time, Another Place",Les Coeurs captifs,1983,1983-05-13,"Drama,War",Drama,GB,118,4.7,6,1.4,0.0,,Set in 1943 in Scotland during World War II. J...,73069,"War, Drama",6.4,373.0
1,tt0062285,"Oh, Sun","Oh, Sun",Soleil Ô,1970,1973-01-04,Drama,Drama,"FR,MR",98,6.7,15,1.081,0.0,,"The film Soleil Ô, shot over four years with a...",195841,Drama,7.3,939.0
2,tt0064004,All the Right Noises,All the Right Noises,All the Right Noises,1970,1971-01-01,"Comedy,Drama","Comedy,Drama",GB,92,5.6,12,1.13,0.0,The story every man will understand and every ...,A married man with two small children begins a...,44198,"Comedy, Drama",6.0,509.0


In [None]:
df_title_crew = pd.read_csv(link_title_crew, sep="\t", low_memory=False)
df_title_crew

Unnamed: 0,tconst,directors,writers
0,tt0000001,nm0005690,\N
1,tt0000002,nm0721526,\N
2,tt0000003,nm0721526,nm0721526
3,tt0000004,nm0721526,\N
4,tt0000005,nm0005690,\N
...,...,...,...
11650011,tt9916848,nm1485677,"nm9187127,nm1485677,nm9826385,nm9299459,nm1628284"
11650012,tt9916850,nm1485677,"nm9187127,nm1485677,nm9826385,nm1628284"
11650013,tt9916852,nm1485677,"nm9187127,nm1485677,nm9826385,nm9299459,nm1628284"
11650014,tt9916856,nm10538645,nm6951431


In [None]:
df_title_principals = pd.read_csv(link_title_principals, sep="\t", chunksize=1000, low_memory=False)
df_title_principals.get_chunk(100)

Unnamed: 0,tconst,ordering,nconst,category,job,characters
0,tt0000001,1,nm1588970,self,\N,"[""Self""]"
1,tt0000001,2,nm0005690,director,\N,\N
2,tt0000001,3,nm0005690,producer,producer,\N
3,tt0000001,4,nm0374658,cinematographer,director of photography,\N
4,tt0000002,1,nm0721526,director,\N,\N
...,...,...,...,...,...,...
95,tt0000024,3,nm0010291,director,\N,\N
96,tt0000024,4,nm0010291,producer,producer,\N
97,tt0000024,5,nm0666972,producer,producer,\N
98,tt0000024,6,nm0010291,cinematographer,\N,\N


In [None]:
df_name_basics = pd.read_csv(link_name_basics, sep="\t", chunksize=1000, low_memory=False)
df_name_basics.get_chunk(100)

Unnamed: 0,nconst,primaryName,birthYear,deathYear,primaryProfession,knownForTitles
0,nm0000001,Fred Astaire,1899,1987,"actor,miscellaneous,producer","tt0072308,tt0050419,tt0027125,tt0031983"
1,nm0000002,Lauren Bacall,1924,2014,"actress,soundtrack,archive_footage","tt0037382,tt0075213,tt0117057,tt0038355"
2,nm0000003,Brigitte Bardot,1934,\N,"actress,music_department,producer","tt0057345,tt0049189,tt0056404,tt0054452"
3,nm0000004,John Belushi,1949,1982,"actor,writer,music_department","tt0072562,tt0077975,tt0080455,tt0078723"
4,nm0000005,Ingmar Bergman,1918,2007,"writer,director,actor","tt0050986,tt0069467,tt0050976,tt0083922"
...,...,...,...,...,...,...
95,nm0000096,Gillian Anderson,1968,\N,"actress,producer,director","tt0106179,tt2294189,tt0455590,tt0442632"
96,nm0000097,Pamela Anderson,1967,\N,"actress,producer,director","tt31193791,tt0096542,tt18376122,tt0101120"
97,nm0000098,Jennifer Aniston,1969,\N,"actress,producer,director","tt0108778,tt3442006,tt1723121,tt0279113"
98,nm0000099,Patricia Arquette,1968,\N,"actress,producer,director","tt0108399,tt1065073,tt0412175,tt0145531"


In [None]:
# Verifier s'il y a des episodes qui correspondent à des films
df_title_episode = pd.read_csv(link_title_episode, sep="\t", low_memory=False)

Unnamed: 0,tconst,parentTconst,seasonNumber,episodeNumber
0,tt0031458,tt32857063,\N,\N
1,tt0041951,tt0041038,1,9
2,tt0042816,tt0989125,1,17
3,tt0042889,tt0989125,\N,\N
4,tt0043426,tt0040051,3,42
...,...,...,...,...
95,tt0055130,tt0190181,2,5
96,tt0055131,tt0161126,4,27
97,tt0055132,tt0190181,2,2
98,tt0055203,tt0161126,4,47


In [None]:
df_tmdb = pd.read_csv(output, low_memory=False)

df_tmdb.head()

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,production_companies_name,production_companies_country
0,False,/dvQj1GBZAZirz1skEEZyWH2ZqQP.jpg,0,['Comedy'],,3924,tt0029927,en,Blondie,Blondie and Dagwood are about to celebrate the...,...,70,['en'],Released,The favorite comic strip of millions at last o...,Blondie,False,7.214,7,['Columbia Pictures'],['US']
1,False,,0,['Adventure'],,6124,tt0011436,de,Der Mann ohne Namen,,...,420,[],Released,,"Peter Voss, Thief of Millions",False,0.0,0,[],[]
2,False,/uJlc4aNPF3Y8yAqahJTKBwgwPVW.jpg,0,"['Drama', 'Romance']",,8773,tt0055747,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,...,110,"['it', 'ja', 'pl', 'fr', 'de']",Released,The Intimate Secrets of Young Lovers,Love at Twenty,False,6.7,41,"['Ulysse Productions', 'Unitec Films', 'Cinese...","['', 'NZ', 'IT', 'JP', 'DE', 'PL', '']"
3,False,/hQ4pYsIbP22TMXOUdSfC2mjWrO0.jpg,0,"['Drama', 'Comedy', 'Crime']",,2,tt0094675,fi,Ariel,Taisto Kasurinen is a Finnish coal miner whose...,...,73,['fi'],Released,,Ariel,False,7.046,248,['Villealfa Filmproductions'],['FI']
4,False,/l94l89eMmFKh7na2a1u5q67VgNx.jpg,0,"['Drama', 'Comedy', 'Romance']",,3,tt0092149,fi,Varjoja paratiisissa,"An episode in the life of Nikander, a garbage ...",...,76,['en'],Released,,Shadows in Paradise,False,7.182,269,['Villealfa Filmproductions'],['FI']
