In [1]:
import pandas as pd
from pathlib import Path

# Loading all processed files

p = Path("../data/processed")

movies = pd.read_csv(p / "movies_clean.csv")
credits = pd.read_csv(p / "credits_clean.csv")
ratings = pd.read_csv(p / "ratings_summary.csv")


In [2]:
# consistent ID types
for df in (movies, credits, ratings):
    df['tmdbId'] = pd.to_numeric(df['tmdbId'], errors='coerce').astype('Int64')


In [3]:
# Merge movies + credits
final_data = movies.merge(credits, on="tmdbId", how="left")

# Merge ratings
final_data = final_data.merge(ratings, on="tmdbId", how="left")


In [5]:
# Filling missing ratings

final_data["rating_count"] = final_data["rating_count"].fillna(0).astype(int)
final_data["rating_mean"] = final_data["rating_mean"].fillna(final_data["rating_mean"].mean())

In [6]:
final_data

Unnamed: 0,budget,genres,homepage,tmdbId,keywords,original_language,original_title,overview,popularity,production_companies,...,primary_country,primary_keyword,release_year,combined_features,top_cast,directors,writers,producers,rating_count,rating_mean
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{'id': 1463, 'name': 'culture clash'}, {'id':...",en,avatar,"in the 22nd century, a paraplegic marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,United States of America,culture clash,2009.0,"avatar in the 22nd century, a paraplegic marin...","Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,James Cameron,"James Cameron, Jon Landau",36901,3.608330
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{'id': 270, 'name': 'ocean'}, {'id': 726, 'na...",en,pirates of the caribbean: at world's end,"captain barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,United States of America,ocean,2007.0,pirates of the caribbean: at world's end capta...,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Ted Elliott, Terry Rossio","Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...",15945,3.395484
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{'id': 470, 'name': 'spy'}, {'id': 818, 'name...",en,spectre,a cryptic message from bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,United Kingdom,spy,2015.0,spectre a cryptic message from bond’s past sen...,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"John Logan, Robert Wade, Neal Purvis, Jez Butt...","Barbara Broccoli, Michael G. Wilson",5630,3.392451
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{'id': 849, 'name': 'dc comics'}, {'id': 853,...",en,the dark knight rises,following the death of district attorney harve...,112.312950,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,United States of America,dc comics,2012.0,the dark knight rises following the death of d...,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Christopher Nolan, Jonathan Nolan, David S. Goyer","Charles Roven, Christopher Nolan, Emma Thomas",31704,3.973000
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{'id': 818, 'name': 'based on novel'}, {'id':...",en,john carter,"john carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,United States of America,based on novel,2012.0,"john carter john carter is a war-weary, former...","Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Andrew Stanton, Michael Chabon, Mark Andrews","Colin Wilson, Jim Morris, Lindsey Collins",3674,3.159363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",https://www.searchlightpictures.com/flamin-hot,626332,"[{'id': 5565, 'name': 'biography'}, {'id': 967...",en,flamin' hot,"the inspiring true story of richard montañez, ...",53.988000,"[{'id': 61377, 'logo_path': '/6IQmmSszsXcxbew2...",...,United States of America,biography,2023.0,flamin' hot the inspiring true story of richar...,"Jesse Garcia, Annie Gonzalez, Emilio Rivera, V...",Eva Longoria,"Lewis Colick, Linda Yvette Chavez",DeVon Franklin,9,3.888889
5794,0,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",,1094319,[],en,the best man,mercenaries seize control of a remote resort h...,47.740000,"[{'id': 76992, 'logo_path': '/mw3j3P8SUQJKLSJQ...",...,Unknown,,2023.0,the best man mercenaries seize control of a re...,"Brendan Fehr, Dolph Lundgren, Luke Wilson, Nic...",Shane Dax Taylor,"Shane Dax Taylor, Daniel Zirilli, C. Alec Rossel",,4,1.875000
5795,0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,998623,[],fr,drone games,,27.587000,"[{'id': 178619, 'logo_path': '/wlyh239wcxFSE7K...",...,France,,2023.0,drone games Action France,"Orlando Vauthier, Axel Granberger, Camille Léo...",Olivier Abbou,"Olivier Abbou, Thibault Lang-Willar, Mathilde ...","Bruno Merle, Olivier Abbou, Noor ""Rize"" Sadar,...",0,3.164733
5796,0,"[{'id': 99, 'name': 'Documentary'}]",https://tv.apple.com/movie//umc.cmc.633pbtki99...,1155770,"[{'id': 14618, 'name': 'british spy'}, {'id': ...",en,the pigeon tunnel,academy award® winner errol morris pulls back ...,47.684000,"[{'id': 28275, 'logo_path': '/b6VatRipyJYN52xL...",...,United Kingdom,british spy,2023.0,the pigeon tunnel academy award® winner errol ...,"John le Carré, Jake Dove, Charlotte Hamblin, G...",Errol Morris,Errol Morris,"Errol Morris, Stephen Cornwell, Dominic Crossl...",0,3.164733


In [8]:
# Enhance combined features

final_data["combined_features"] = (
    final_data["combined_features"].fillna("") + " " +
    final_data["top_cast"].fillna("") + " " +
    final_data["directors"].fillna("")
).str.strip()


In [9]:
final_data

Unnamed: 0,budget,genres,homepage,tmdbId,keywords,original_language,original_title,overview,popularity,production_companies,...,primary_country,primary_keyword,release_year,combined_features,top_cast,directors,writers,producers,rating_count,rating_mean
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{'id': 1463, 'name': 'culture clash'}, {'id':...",en,avatar,"in the 22nd century, a paraplegic marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,United States of America,culture clash,2009.0,"avatar in the 22nd century, a paraplegic marin...","Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,James Cameron,"James Cameron, Jon Landau",36901,3.608330
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{'id': 270, 'name': 'ocean'}, {'id': 726, 'na...",en,pirates of the caribbean: at world's end,"captain barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,United States of America,ocean,2007.0,pirates of the caribbean: at world's end capta...,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Ted Elliott, Terry Rossio","Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...",15945,3.395484
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{'id': 470, 'name': 'spy'}, {'id': 818, 'name...",en,spectre,a cryptic message from bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,United Kingdom,spy,2015.0,spectre a cryptic message from bond’s past sen...,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"John Logan, Robert Wade, Neal Purvis, Jez Butt...","Barbara Broccoli, Michael G. Wilson",5630,3.392451
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{'id': 849, 'name': 'dc comics'}, {'id': 853,...",en,the dark knight rises,following the death of district attorney harve...,112.312950,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,United States of America,dc comics,2012.0,the dark knight rises following the death of d...,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Christopher Nolan, Jonathan Nolan, David S. Goyer","Charles Roven, Christopher Nolan, Emma Thomas",31704,3.973000
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{'id': 818, 'name': 'based on novel'}, {'id':...",en,john carter,"john carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,United States of America,based on novel,2012.0,"john carter john carter is a war-weary, former...","Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Andrew Stanton, Michael Chabon, Mark Andrews","Colin Wilson, Jim Morris, Lindsey Collins",3674,3.159363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",https://www.searchlightpictures.com/flamin-hot,626332,"[{'id': 5565, 'name': 'biography'}, {'id': 967...",en,flamin' hot,"the inspiring true story of richard montañez, ...",53.988000,"[{'id': 61377, 'logo_path': '/6IQmmSszsXcxbew2...",...,United States of America,biography,2023.0,flamin' hot the inspiring true story of richar...,"Jesse Garcia, Annie Gonzalez, Emilio Rivera, V...",Eva Longoria,"Lewis Colick, Linda Yvette Chavez",DeVon Franklin,9,3.888889
5794,0,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",,1094319,[],en,the best man,mercenaries seize control of a remote resort h...,47.740000,"[{'id': 76992, 'logo_path': '/mw3j3P8SUQJKLSJQ...",...,Unknown,,2023.0,the best man mercenaries seize control of a re...,"Brendan Fehr, Dolph Lundgren, Luke Wilson, Nic...",Shane Dax Taylor,"Shane Dax Taylor, Daniel Zirilli, C. Alec Rossel",,4,1.875000
5795,0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,998623,[],fr,drone games,,27.587000,"[{'id': 178619, 'logo_path': '/wlyh239wcxFSE7K...",...,France,,2023.0,"drone games Action France Orlando Vauthier, A...","Orlando Vauthier, Axel Granberger, Camille Léo...",Olivier Abbou,"Olivier Abbou, Thibault Lang-Willar, Mathilde ...","Bruno Merle, Olivier Abbou, Noor ""Rize"" Sadar,...",0,3.164733
5796,0,"[{'id': 99, 'name': 'Documentary'}]",https://tv.apple.com/movie//umc.cmc.633pbtki99...,1155770,"[{'id': 14618, 'name': 'british spy'}, {'id': ...",en,the pigeon tunnel,academy award® winner errol morris pulls back ...,47.684000,"[{'id': 28275, 'logo_path': '/b6VatRipyJYN52xL...",...,United Kingdom,british spy,2023.0,the pigeon tunnel academy award® winner errol ...,"John le Carré, Jake Dove, Charlotte Hamblin, G...",Errol Morris,Errol Morris,"Errol Morris, Stephen Cornwell, Dominic Crossl...",0,3.164733


In [10]:
final_data.to_csv(p / "movies_final.csv", index=False)

print("Saved:", p / "movies_final.csv")
final_data.head()


Saved: ..\data\processed\movies_final.csv


Unnamed: 0,budget,genres,homepage,tmdbId,keywords,original_language,original_title,overview,popularity,production_companies,...,primary_country,primary_keyword,release_year,combined_features,top_cast,directors,writers,producers,rating_count,rating_mean
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{'id': 1463, 'name': 'culture clash'}, {'id':...",en,avatar,"in the 22nd century, a paraplegic marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,United States of America,culture clash,2009.0,"avatar in the 22nd century, a paraplegic marin...","Sam Worthington, Zoe Saldana, Sigourney Weaver...",James Cameron,James Cameron,"James Cameron, Jon Landau",36901,3.60833
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{'id': 270, 'name': 'ocean'}, {'id': 726, 'na...",en,pirates of the caribbean: at world's end,"captain barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,United States of America,ocean,2007.0,pirates of the caribbean: at world's end capta...,"Johnny Depp, Orlando Bloom, Keira Knightley, S...",Gore Verbinski,"Ted Elliott, Terry Rossio","Jerry Bruckheimer, Eric McLeod, Chad Oman, Pet...",15945,3.395484
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{'id': 470, 'name': 'spy'}, {'id': 818, 'name...",en,spectre,a cryptic message from bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,United Kingdom,spy,2015.0,spectre a cryptic message from bond’s past sen...,"Daniel Craig, Christoph Waltz, Léa Seydoux, Ra...",Sam Mendes,"John Logan, Robert Wade, Neal Purvis, Jez Butt...","Barbara Broccoli, Michael G. Wilson",5630,3.392451
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{'id': 849, 'name': 'dc comics'}, {'id': 853,...",en,the dark knight rises,following the death of district attorney harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,United States of America,dc comics,2012.0,the dark knight rises following the death of d...,"Christian Bale, Michael Caine, Gary Oldman, An...",Christopher Nolan,"Christopher Nolan, Jonathan Nolan, David S. Goyer","Charles Roven, Christopher Nolan, Emma Thomas",31704,3.973
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{'id': 818, 'name': 'based on novel'}, {'id':...",en,john carter,"john carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,United States of America,based on novel,2012.0,"john carter john carter is a war-weary, former...","Taylor Kitsch, Lynn Collins, Samantha Morton, ...",Andrew Stanton,"Andrew Stanton, Michael Chabon, Mark Andrews","Colin Wilson, Jim Morris, Lindsey Collins",3674,3.159363
