# Creation of a .csv file with all the film data

In [2]:
import pandas as pd

## Loading CSV files

In [3]:
movies = pd.read_csv("data/movies.csv")
posters = pd.read_csv("data/posters.csv")
genres = pd.read_csv("data/genres.csv")
actors = pd.read_csv("data/actors.csv")
crew = pd.read_csv("data/crew.csv")
countries = pd.read_csv("data/countries.csv")
languages = pd.read_csv("data/languages.csv")
releases = pd.read_csv("data/releases.csv")
studios = pd.read_csv("data/studios.csv")
themes = pd.read_csv("data/themes.csv")

## Joining data

In [4]:
details = movies.copy()

## convert date and minute data to int

In [5]:
details['date'] = details['date'].fillna(0).astype(float).astype(int)
details['minute'] = details['minute'].fillna(0).astype(float).astype(int)

In [6]:
details

Unnamed: 0,id,name,date,tagline,description,minute,rating
0,1000001,Barbie,2023,She's everything. He's just Ken.,Barbie and Ken are having the time of their li...,114,3.86
1,1000002,Parasite,2019,Act like you own the place.,"All unemployed, Ki-taek's family takes peculia...",133,4.56
2,1000003,Everything Everywhere All at Once,2022,The universe is so much bigger than you realize.,An aging Chinese immigrant is swept up in an i...,140,4.30
3,1000004,Fight Club,1999,Mischief. Mayhem. Soap.,A ticking-time-bomb insomniac and a slippery s...,139,4.27
4,1000005,La La Land,2016,Here's to the fools who dream.,"Mia, an aspiring actress, serves lattes to mov...",129,4.09
...,...,...,...,...,...,...,...
941590,1941593,神笛,0,,,0,
941591,1941594,蟲極道蜜団子抗争編 壱ノ巻,0,,Shinjuku forest at night. In the sap taverns o...,30,
941592,1941595,蟲極道蜜団子抗争編 弐ノ巻,0,,"The city that never sleeps, where insects gath...",30,
941593,1941596,重生,0,,"In a world where order has broken down, darkne...",0,


# Join the information

### Poster

In [7]:
details = details.merge(posters, on="id", how="left")  # Join the posters

### Genre

In [8]:
# Group genres by ID and create a unique string for each ID
genres_grouped = genres.groupby("id")["genre"].apply(lambda x: ", ".join(x))

# Map grouped values to the 'details' ID
details["genres"] = details["id"].map(genres_grouped).fillna("")

### actors

In [9]:
# Convert the ‘name’ and ‘role’ columns into character strings to avoid errors
actors["name"] = actors["name"].astype(str)
actors["role"] = actors["role"].astype(str)

# Group and concatenate the names of actors with their roles
actors_grouped = actors.groupby("id", group_keys=False).apply(
    lambda x: ", ".join(x["name"] + " (" + x["role"] + ")")
)
details["actors"] = details["id"].map(actors_grouped).fillna("")



  actors_grouped = actors.groupby("id", group_keys=False).apply(


### crew

In [10]:
crew["name"] = crew["name"].astype(str)
crew["role"] = crew["role"].astype(str)

crew_grouped = crew.groupby("id", group_keys=False).apply(lambda x: ", ".join(x["name"] + " (" + x["role"] + ")"))
details["crew"] = details["id"].map(crew_grouped).fillna("")

  crew_grouped = crew.groupby("id", group_keys=False).apply(lambda x: ", ".join(x["name"] + " (" + x["role"] + ")"))


### country

In [11]:
countries_grouped = countries.groupby("id")["country"].apply(lambda x: ", ".join(x))
details["countries"] = details["id"].map(countries_grouped).fillna("")

### languages

In [12]:
languages_grouped = languages.groupby("id", group_keys=False).apply(
    lambda x: ", ".join(x["language"] + " (" + x["type"] + ")")
)
details["languages"] = details["id"].map(languages_grouped).fillna("")

  languages_grouped = languages.groupby("id", group_keys=False).apply(


### releases

In [13]:
releases_grouped = releases.groupby("id", group_keys=False).apply(
    lambda x: ", ".join(x["date"] + " in " + x["country"] + " (" + x["type"] + ")")
)
details["releases"] = details["id"].map(releases_grouped).fillna("")


  releases_grouped = releases.groupby("id", group_keys=False).apply(


### Studios

In [14]:
studios["studio"] = studios["studio"].astype(str)
studios_grouped = studios.groupby("id").apply(
    lambda x: ", ".join(x["studio"])
)
details["studios"] = details["id"].map(studios_grouped).fillna("")

  studios_grouped = studios.groupby("id").apply(


### Themes

In [15]:
themes_grouped = themes.groupby("id", group_keys=False).apply(
    lambda x: ", ".join(x["theme"])
)
details["themes"] = details["id"].map(themes_grouped).fillna("")

  themes_grouped = themes.groupby("id", group_keys=False).apply(


In [16]:
details

Unnamed: 0,id,name,date,tagline,description,minute,rating,link,genres,actors,crew,countries,languages,releases,studios,themes
0,1000001,Barbie,2023,She's everything. He's just Ken.,Barbie and Ken are having the time of their li...,114,3.86,https://a.ltrbxd.com/resized/film-poster/2/7/7...,"Comedy, Adventure","Margot Robbie (Barbie), Ryan Gosling (Ken), Am...","Greta Gerwig (Director), Tom Ackerley (Produce...","UK, USA",English (Language),"2023-07-21 in Andorra (Theatrical), 2023-07-20...","LuckyChap Entertainment, Heyday Films, NB/GG P...","Humanity and the world around us, Crude humor ..."
1,1000002,Parasite,2019,Act like you own the place.,"All unemployed, Ki-taek's family takes peculia...",133,4.56,https://a.ltrbxd.com/resized/film-poster/4/2/6...,"Comedy, Thriller, Drama","Song Kang-ho (Kim Ki-taek), Lee Sun-kyun (Park...","Bong Joon-ho (Director), Jang Young-hwan (Prod...",South Korea,"Korean (Primary language), English (Spoken lan...","2019-11-10 in Argentina (Premiere), 2020-01-23...",Barunson E&A,"Humanity and the world around us, Intense viol..."
2,1000003,Everything Everywhere All at Once,2022,The universe is so much bigger than you realize.,An aging Chinese immigrant is swept up in an i...,140,4.30,https://a.ltrbxd.com/resized/film-poster/4/7/4...,"Science Fiction, Adventure, Comedy, Action","Michelle Yeoh (Evelyn Wang), Ke Huy Quan (Waym...","Daniel Scheinert (Director), Daniel Kwan (Dire...",USA,"English (Primary language), Cantonese (Spoken ...","2022-06-09 in Argentina (Theatrical), 2022-03-...","IAC Films, AGBO, Ley Line Entertainment, Year ...","Humanity and the world around us, Moving relat..."
3,1000004,Fight Club,1999,Mischief. Mayhem. Soap.,A ticking-time-bomb insomniac and a slippery s...,139,4.27,https://a.ltrbxd.com/resized/film-poster/5/1/5...,Drama,"Edward Norton (Narrator), Brad Pitt (Tyler Dur...","David Fincher (Director), Ross Grayson Bell (P...","Germany, USA",English (Language),"1999-11-04 in Argentina (Theatrical), 1999-11-...","Fox 2000 Pictures, Regency Enterprises, The Li...","Intense violence and sexual transgression, Hum..."
4,1000005,La La Land,2016,Here's to the fools who dream.,"Mia, an aspiring actress, serves lattes to mov...",129,4.09,https://a.ltrbxd.com/resized/film-poster/2/4/0...,"Drama, Comedy, Music, Romance","Ryan Gosling (Sebastian), Emma Stone (Mia), Jo...","Damien Chazelle (Director), Jordan Horowitz (P...","Hong Kong, USA",English (Language),"2016-12-22 in Argentina (Theatrical), 2016-12-...","Summit Entertainment, Black Label Media, Gilbe...","Song and dance, Humanity and the world around ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941590,1941593,神笛,0,,,0,,,,,,China,Chinese (Language),,,
941591,1941594,蟲極道蜜団子抗争編 壱ノ巻,0,,Shinjuku forest at night. In the sap taverns o...,30,,,,,,USA,English (Language),,,
941592,1941595,蟲極道蜜団子抗争編 弐ノ巻,0,,"The city that never sleeps, where insects gath...",30,,https://a.ltrbxd.com/resized/film-poster/1/1/8...,,,,USA,English (Language),,,
941593,1941596,重生,0,,"In a world where order has broken down, darkne...",0,,https://a.ltrbxd.com/resized/film-poster/1/1/8...,"Action, Crime","Nick Cheung (Zhang Yao/张耀), Ethan Juan (An Du/...","Marc Ma (Director), 朱子亮 (Producer), Marc Ma (W...",China,Chinese (Language),,"上海猫眼影业有限公司, 坏小子（北京）传媒有限公司, 亚太国影（重庆）文化传媒有限公司, 凤...",


In [17]:
details.dtypes

id               int64
name            object
date             int32
tagline         object
description     object
minute           int32
rating         float64
link            object
genres          object
actors          object
crew            object
countries       object
languages       object
releases        object
studios         object
themes          object
dtype: object

## Save the file with all the film information

In [18]:
details.to_csv("details_movie.csv", index=False, sep=';')

print("Consolidated file ‘details_movie.csv’ created successfully.")

Consolidated file ‘details_movie.csv’ created successfully.
