# TMDB-IMDb 드라마 데이터 파악 및 전처리

In [1]:
# 데이터 처리 및 분석
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats # 통계 분석/검정용
import pingouin as pg # SciPy 통계함수보다 더 많은 정보를 간단한 함수 호출로 반환
import warnings # 경고 메시지 관리용
import scikit_posthocs as sp # 사후 다중 비교 검정용
import re
from scipy.stats import skew, kurtosis

# 출력 설정
pd.set_option('display.max_columns', None) # 모든 컬럼 다 보기

# 시각화
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mtick # 축 눈금(tick) 관련 기능 설정
plt.rcParams['axes.unicode_minus'] = False # 그래프에 마이너스(‘−’) 기호가 깨지는 현상을 방지

# 한글 폰트 설정
import platform
if platform.system() == 'Windows': # Windows OS
    plt.rcParams['font.family'] = 'Malgun Gothic'
elif platform.system() == 'Darwin':  # mac OS
    plt.rcParams['font.family'] = 'AppleGothic'
else:  # Linux
    plt.rcParams['font.family'] = 'NanumGothic'


In [None]:
tv_series = pd.read_parquet('tv_series.parquet')
tv_series_dwc = pd.read_parquet('tv_series_directors_writers_cast.parquet') # 시리즈의 director, writer, top_cast 수집한 데이터

In [18]:
tv_series

Unnamed: 0,id,title,type,adult,backdrop_path,created_by,episode_run_time,first_air_date,genres,genre_ids,homepage,in_production,languages,last_air_date,last_episode_to_air_id,last_episode_to_air_name,last_episode_to_air_overview,last_episode_to_air_vote_average,last_episode_to_air_vote_count,last_episode_to_air_air_date,last_episode_to_air_episode_number,last_episode_to_air_production_code,last_episode_to_air_runtime,last_episode_to_air_season_number,last_episode_to_air_show_id,last_episode_to_air_still_path,next_episode_to_air,networks,number_of_episodes,number_of_seasons,origin_country,original_language,original_name,overview,popularity,poster_path,production_companies,production_countries,seasons,spoken_languages,status,tagline,type_detail,vote_average,vote_count,review,keyword,top_cast,directors,writers,providers_flatrate,providers_rent,providers_buy,imdb_id,imdb_rating,imdb_rating_count
0,3,The Message,tv_series,False,,,"25, 30",2006-05-20,Comedy,35,http://www.messagetelevision.com/,False,en,2006-06-24,1130491.0,Episode 6,,0.0,0.0,2006-06-24,6.0,,,1.0,3.0,,,BBC Three,6.0,1,GB,en,The Message,The Message was a surreal comedy series which ...,0.4575,/wK9h8FwbmOWlMyW6fT2C6yFPvSu.jpg,,,S1: Season 1 (6 eps),English,Ended,,Scripted,7.9,4,,,"Katherine Jakeways, Anthony Mark Barrow, Stefa...",,,{},{},{},tt1525220,,
1,4,The Amazing Mrs Pritchard,tv_series,False,/oJdDWkRNkdWrYhOTNyooJDJEFoX.jpg,Sally Wainwright,60,2006-10-03,Drama,18,https://www.bbc.co.uk/programmes/b006mg4s,False,en,2006-11-07,1130494.0,Episode 6,Will Ros keep her promise to never lie to the ...,0.0,0.0,2006-11-07,6.0,,60.0,1.0,4.0,/lGyA1sZZI2gwuagGV1fFgVwNE6J.jpg,,BBC One,6.0,1,GB,en,The Amazing Mrs Pritchard,Supermarket manager Ros Pritchard decides to s...,0.8613,/r2iyYZ98eN70C7kkUIYbW3QaJwn.jpg,,,S1: Season 1 (6 eps),English,Ended,From middle-class mum to Prime Minister.,Scripted,6.2,5,,,"Jane Horrocks, Steven Mackintosh, Jodhi May, J...","Declan Lowney, Simon Curtis",,"{""AU"": [""BritBox"", ""Britbox Apple TV Channel ""...",{},{},tt0807980,,
2,11,Strictly Sex with Dr. Drew,tv_series,False,/se2jymoLDghIrLewQismwfmWG.jpg,,60,2005-06-08,,,,False,en,2005-08-24,1130622.0,Sexual Chemistry,,0.0,0.0,2005-08-24,10.0,,60.0,1.0,11.0,,,Discovery Health Channel,10.0,1,US,en,Strictly Sex with Dr. Drew,Strictly Sex with Dr. Drew is a television sho...,0.2813,/3hFpUg6Ty25Vs5XgbnNz1Xcirb5.jpg,,,S1: Season 1 (10 eps),English,Ended,,Scripted,7.5,2,,sex education,,,,{},{},{},tt0452567,,
3,14,Shuriken School,tv_series,False,/as6DK6A84DHcsnCqzzC0JQ6P5Rr.jpg,,"25, 23, 30",2006-07-03,"Animation, Action & Adventure, Comedy, Kids, S...","16, 10759, 35, 10762, 10765",http://www.shurikenschool.com/,False,"en, fr",2006-11-11,1930038.0,Dirty Rice Balls,Something's gone terribly wrong with Eizan's f...,0.0,0.0,2006-11-11,26.0,,23.0,1.0,14.0,,,Nickelodeon,26.0,1,"FR, ES",en,Shuriken School,Eizan Kaburagi and his friends experience thei...,0.7980,/khXHvYKAvGqjtnKoLVYZr8r5yKD.jpg,"Zinkia Entertainment, Studio Xilam",,S1: Season 1 (26 eps),"English, Français",Ended,,Scripted,6.1,13,,ninja,"Jessica DiCicco, Charlie Adler, Kimberly Brook...",,,{},{},"{""CA"": [""Apple TV""], ""US"": [""Amazon Video"", ""A...",tt0835241,,
4,17,Mighty Truck of Stuff,tv_series,False,/o11dESROFbd3n8e88dtW8oMHwEE.jpg,,180,2005-10-15,,,http://www.mightytruckofstuff.com/,False,en,2006-07-08,1130763.0,"Series 2, Show 15",,0.0,0.0,2006-07-08,15.0,,180.0,2.0,17.0,,,CBBC,15.0,1,GB,en,Mighty Truck of Stuff,Mighty Truck of Stuff was a British children's...,0.4857,/rIIXBA2fbJ47Qj3SrsHWmfw0P5q.jpg,,,S2: Season 2 (15 eps),English,Ended,,Scripted,0.0,0,,,Reggie Yates,,,{},{},{},tt0490733,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138368,306823,在恋爱开始之前,tv_series,False,/7OAAtBUL4TL0VLDdYBSB8CfQOsd.jpg,,0,2025-11-25,Reality,10764,,True,zh,2025-11-25,6722903.0,Episode 2,,0.0,0.0,2025-11-25,2.0,,,1.0,306823.0,/m830DsYJBkv5Su814fV3tPtd9io.jpg,,Mango TV,2.0,1,CN,zh,在恋爱开始之前,,0.0596,/kzLYvsyP40ZqGrSb9yPm8APV941.jpg,MangoTV,China,S1: Season 1 (2 eps),普通话,Returning Series,,Reality,0.0,0,,,"Eliza Liang, Luo Yizhou, Christy Chung Lai-Tai...",,,,,,,,
138369,306719,Hvem passer på grisene?,tv_series,False,/x5Ew4m04TIdKLKRorhe2CCGqaaU.jpg,,40,2024-12-12,Documentary,99,,True,da,2024-12-12,6719028.0,Episode 2,,0.0,0.0,2025-11-26,2.0,,34.0,1.0,306719.0,,,TV 2,2.0,1,DK,da,Hvem passer på grisene?,,0.0829,/eSQYVhpyPJZTVesCWFiB54u9h7V.jpg,TV 2,Denmark,S1: Season 1 (2 eps),Dansk,Returning Series,,Documentary,10.0,1,,,,,,,,,,,
138370,307157,Science Fiction Revolution,tv_series,False,,,0,,,,,True,,,6732143.0,Episode 1,,0.0,0.0,2025-05-04,1.0,,,1.0,,,,,,0,US,en,Science Fiction Revolution,,0.0000,,,,,,Returning Series,,Scripted,0.0,0,,,,,,,,,tt39072677,,
138371,307156,มิติสยอง เชื่อเป็น...ไม่เชื่อตาย,tv_series,False,,,0,,,,,True,,,6732121.0,Episode 1,,0.0,0.0,2017-05-06,1.0,,,1.0,,,,,,0,US,en,มิติสยอง เชื่อเป็น...ไม่เชื่อตาย,,0.0000,,,,,,Returning Series,,Scripted,0.0,0,,,,,,,,,,,


In [16]:
tv_series_dwc

Unnamed: 0,imdb_id,series_id,title,original_name,first_air_date,vote_average,vote_count,popularity,executive_producer_name,executive_producer_ids,executive_producer_gender,executive_producer_profile_path,writers_name,writer_roles,writer_ids,writer_gender,writer_profile_path,top_cast_order,top_cast,character,top_cast_ids,top_cast_gender,top_cast_profile_path
0,tt0469713,19505,Los Reyes,Los Reyes,2005-01-02,8.400,141,3.4606,,,,,,,,,,0; 1; 2; 4; 5,Cony Camelo; Enrique Carriazo; Jery Sandoval; ...,Hilda Edilberta Reyes; Edilberto Reyes; María ...,3107359; 1032964; 4091355; 2422857; 1251174,1; 2; 1; 0; 2,/ekVNd7vxaYysMfTbEUQ1GKtS2wP.jpg; /jQ19ywdYltt...
1,tt0437729,14981,The Late Late Show with Craig Ferguson,The Late Late Show with Craig Ferguson,2005-01-03,6.712,111,21.4894,,,,,,,,,,1,Craig Ferguson,Self - Host,24264,2,/mBxNrOxdQeM7bVBveE47Syj9ES6.jpg
2,tt0441059,11466,Kaamelott,Kaamelott,2005-01-03,8.500,177,11.3855,,,,,Christophe Fort,Writer,1243297,0,,0; 1; 2; 3; 4,Alexandre Astier; Anne Girouard; Lionnel Astie...,Roi Arthur; Guenièvre; Léodagan; Séli; Lancelot,47826; 204034; 145231; 1316265; 1243291,2; 1; 2; 1; 2,/vOR9GRWaXMsDnnFSOoloUlNvwgI.jpg; /ykySP1Ox1JO...
3,tt0412175,36,Medium,Medium,2005-01-03,7.462,553,22.7821,,,,,Jacob Cooney,Writers' Assistant,1318326,2,/4hEMgTX1BBXXNPGNFga8xY3e7MS.jpg,0; 1; 2; 3; 4,Patricia Arquette; Jake Weber; Miguel Sandoval...,Allison Dubois; Joe Dubois; Manuel Devalos; Le...,4687; 4177; 30488; 155282; 95039,1; 2; 2; 2; 1,/jeThSouMatiuRiLkjDvSBLHpmq0.jpg; /z1RTh4qvyMF...
4,tt0443370,13678,Dragons' Den,Dragons' Den,2005-01-04,6.258,31,3.8883,Samantha Davies,2574817,0,,,,,,,1; 2; 8; 9; 13,Sara Davies; Touker Suleyman; Peter Jones; Deb...,Dragon; Dragon; Dragon; Dragon; Presenter,2574820; 2574821; 1236478; 1225323; 225273,1; 2; 2; 1; 2,/c67QTFZsEcUly4f1zzrtJEKqJWd.jpg; /8gKM5WjQkzv...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6981,tt31314751,246386,All Her Fault,All Her Fault,2025-11-07,8.413,46,20.8991,Joanna Strevens; Nigel Marchant; Megan Gallagh...,3719625; 1252497; 2190822; 943340; 1324180; 48...,1; 2; 1; 2; 1; 1; 1,; ; ; /jx1VSzBzrOoLJjepcVlN02IJaxl.jpg; /kUWpo...,Andrea Mara,Novel,4536447,1,,0; 1; 2; 3; 4,Sarah Snook; Jake Lacy; Sophia Lillis; Michael...,Marissa Irvine; Peter Irvine; Carrie Finch; De...,235416; 496470; 1481238; 454; 501,1; 2; 1; 2; 1,/w9xv72oaTISLgeT381fU4Jor9GV.jpg; /qU6n9EaesBG...
6982,tt32237534,252193,Last Samurai Standing,イクサガミ,2025-11-13,7.949,118,32.4336,Shinichi Takahashi,2154930,2,,Shogo Imamura,Novel,4662692,2,,0; 1; 2; 3; 4,Junichi Okada; Yumia Fujisaki; Kaya Kiyohara; ...,Shujiro Saga; Futaba Katsuki; Iroha Kinugasa; ...,119241; 4165567; 1671407; 1185386; 1018944,2; 1; 1; 2; 2,/pk5rltXChl9A3HjS6iQLGt1DWJY.jpg; /2xTBm6VCTdF...
6983,tt31974367,250504,The Beast in Me,The Beast in Me,2025-11-13,7.287,94,25.9515,Conan O'Brien; David Kissinger; Jodie Foster; ...,81200; 1380810; 1038; 1961940; 1857699; 121442...,2; 2; 1; 2; 2; 2; 2; 1; 2; 1,/zlALLZ3c9BciE85XaKq9RrrEp1E.jpg; ; /8DAd9knKi...,,,,,,0; 1; 2; 3,Claire Danes; Matthew Rhys; Brittany Snow; Nat...,Agatha 'Aggie' Wiggs; Nile Jarvis; Nina Jarvis...,6194; 29528; 29221; 118752,1; 2; 1; 1,/6YvMc6RViFSornxmZjRgEKWQMzI.jpg; /8uktKexgPn0...
6984,tt34231642,274979,The Crystal Cuckoo,El cuco de cristal,2025-11-14,6.551,39,17.6936,,,,,Javier Castillo,Novel,3907168,0,,0; 1; 2; 3; 4,Catalina Sopelana; Itziar Ituño; Álex García; ...,Clara Merlo; Marta Peña; Miguel Ferrer; Rafael...,2164904; 947514; 1063319; 31422; 119972,1; 1; 2; 2; 2,/mzbWt3yS1pgZKNH6R78WhqWkgSH.jpg; /vnrfL03NwRJ...


## 드라마 필터링

### (1) vote_count>=30 필터링

In [3]:
pd.set_option('display.max_rows', 1000) 
tv_series_filtered = tv_series[
    (tv_series['imdb_id'].notna()) &
    (tv_series['vote_count']>=30)
    ].copy()
tv_series_filtered

Unnamed: 0,id,title,type,adult,backdrop_path,created_by,episode_run_time,first_air_date,genres,genre_ids,homepage,in_production,languages,last_air_date,last_episode_to_air_id,last_episode_to_air_name,last_episode_to_air_overview,last_episode_to_air_vote_average,last_episode_to_air_vote_count,last_episode_to_air_air_date,last_episode_to_air_episode_number,last_episode_to_air_production_code,last_episode_to_air_runtime,last_episode_to_air_season_number,last_episode_to_air_show_id,last_episode_to_air_still_path,next_episode_to_air,networks,number_of_episodes,number_of_seasons,origin_country,original_language,original_name,overview,popularity,poster_path,production_companies,production_countries,seasons,spoken_languages,status,tagline,type_detail,vote_average,vote_count,review,keyword,top_cast,directors,writers,providers_flatrate,providers_rent,providers_buy,imdb_id,imdb_rating,imdb_rating_count
6,36,Medium,tv_series,False,/dVpAGo3TT2IjDyRuKwhNfe9E1A5.jpg,Glenn Gordon Caron,42,2005-01-03,"Crime, Drama, Mystery, Sci-Fi & Fantasy","80, 18, 9648, 10765",http://www.cbs.com/primetime/medium/,False,"en, it",2011-01-21,855.0,Me Without You,When Allison becomes a lawyer and Joe starts a...,8.000,3.0,2011-01-21,13.0,,44.0,7.0,36.0,/viRBkXMCIyRJHloP6t4SW3Knspl.jpg,,"NBC, CBS",128.0,7,US,en,Medium,Allison Dubois works in the District Attorney’...,19.6194,/6tOxspW2MbbAQ0s3jtfs8QATcx.jpg,"Pointe Studios, Picturemaker Productions, Gram...",United States of America,S0: Specials (30 eps); S1: Season 1 (16 eps); ...,"English, Italiano",Ended,She Sees What Others Can't.,Scripted,7.462,553,,"medium, psychic power, paranormal, criminal co...","Patricia Arquette, Jake Weber, Miguel Sandoval...","Aaron Lipstadt, Andy Wolk, Arlene Sanford, Arl...","Analisa Brouet, Arika Lisanne Mittman, Bernade...","{""AR"": [""Paramount Plus"", ""Paramount Plus Appl...",{},"{""AT"": [""Amazon Video""], ""AU"": [""Apple TV""], ""...",tt0412175,7.2,32826.0
12,114,Nathan Barley,tv_series,False,/1BDNaMPm5NA3NaMkcmdqGz45RFm.jpg,"Chris Morris, Charlie Brooker",26,2005-02-11,Comedy,35,http://www.trashbat.co.ck,False,en,2005-03-18,5415.0,The Idiots are Winning,"In the final episode, Nathan finds his way int...",0.000,0.0,2005-03-18,6.0,,26.0,1.0,114.0,/c6J7HE6yWV769LDqRKX14rkCAc2.jpg,,Channel 4,6.0,1,GB,en,Nathan Barley,Nathan Barley is a Channel 4 sitcom written by...,1.3663,/jAd2zeYyWv89L6VRMAtPsssAZ7r.jpg,"Talkback, Channel 4 Television",United Kingdom,S0: Specials (2 eps); S1: Season 1 (6 eps),English,Ended,,Scripted,7.200,40,,sitcom,"Nicholas Burns, Julian Barratt, Claire Keelan,...",Chris Morris,"Charlie Brooker, Chris Morris",{},{},{},tt0426654,8.0,4539.0
14,119,Cory in the House,tv_series,False,/nwrnwIbYP3BUFr6WoJApp2vnAAO.jpg,"Marc Warren, Dennis Rinsler",23,2007-01-12,"Comedy, Kids","35, 10762",,False,en,2008-09-12,5649.0,Mad Songs Pay So Much,"After breaking up with Craig, Meena writes an ...",8.000,2.0,2008-09-12,13.0,213,23.0,2.0,119.0,/g8DVMW8wKXhi8EjM7gaDBWyeZMF.jpg,,Disney Channel,34.0,2,US,en,Cory in the House,It's a brand new life for Cory Baxter when his...,3.9700,/kf7aiSIFDP1tkxsqckgWxIyuumw.jpg,"It's a Laugh Productions, Warren & Rinsler Pro...",United States of America,S1: Season 1 (21 eps); S2: Season 2 (13 eps),English,Ended,,Scripted,6.800,92,,"washington dc, usa, the white house, president...","John D'Aquino, Kyle Massey, Rondell Sheridan, ...","David Kendall, Eric Dean Seaton, Mark Cendrows...","Dennis Rinsler, Edward C. Evans, Marc Warren, ...",{},{},"{""AT"": [""Amazon Video""], ""AU"": [""Apple TV""], ""...",tt0805815,5.0,7507.0
15,132,Fantastic Four: World's Greatest Heroes,tv_series,False,/q0bLjLCBZGhnMcRprx2gWRGdlrp.jpg,"Craig Kyle, Christopher L. Yost",22,2006-09-02,"Animation, Action & Adventure, Sci-Fi & Fantas...","16, 10759, 10765, 10762",,False,en,2007-10-20,7325.0,Scavenger Hunt,"Terminus, an alien scavenger, comes to steal a...",5.000,1.0,2007-10-20,26.0,,22.0,1.0,132.0,/6k1U2f4Lh50BB8htRVye7r5QH5t.jpg,,Cartoon Network,26.0,1,US,en,Fantastic Four: World's Greatest Heroes,"Mr. Fantastic, the Invisible Woman, the Human ...",2.8511,/zCtCW0JlbMpzfQDAX1bO1o1KLi.jpg,"MoonScoop, Marvel Entertainment","France, United States of America",S0: Specials (3 eps); S1: Season 1 (26 eps),English,Ended,,Scripted,7.500,93,,"superhero, based on comic, big city, family, a...","Hiro Kanagawa, Lara Gilchrist, Christopher Jac...",Franck Michel,"Bob Forward, Chris Hicks, Christopher L. Yost,...","{""CA"": [""Disney Plus""], ""US"": [""Disney Plus""]}",{},"{""CA"": [""Apple TV""], ""US"": [""Amazon Video"", ""A...",tt0830298,6.7,2940.0
23,186,Weeds,tv_series,False,/6k6vAUa0Fm73HB0QBOJbFGTgPq5.jpg,Jenji Kohan,27,2005-08-07,"Comedy, Crime, Drama","35, 80, 18",,False,en,2012-09-16,11652.0,It's Time (2),"At Stevie's bar mitzvah, friends and family ga...",7.800,11.0,2012-09-16,13.0,,30.0,8.0,186.0,/lXA5tUHMkjKc1d8WKHv0kX2ORNh.jpg,,Showtime,102.0,8,US,en,Weeds,"After the unexpected death of her husband, a s...",19.8985,/gKUsBTa5b0GY7U4iWBseeBalfjO.jpg,"Tilted Productions, Lionsgate Television",United States of America,S0: Specials (1 eps); S1: Season 1 (10 eps); S...,English,Ended,The neighborhood has gone green.,Scripted,7.484,935,Cat Ellington(1.0): What the hell had Mary-Lou...,"drug dealer, outlaw, dysfunctional family, dar...","Mary-Louise Parker, Justin Kirk, Hunter Parris...","Adam Bernstein, Arlene Sanford, Bethany Rooney...","Barry Safchik, Blair Singer, Brendan Kelly, Ca...","{""AD"": [""Netflix""], ""AE"": [""Netflix""], ""AG"": [...","{""FR"": [""Canal VOD""], ""JP"": [""Amazon Video""]}","{""AT"": [""Amazon Video"", ""Google Play Movies"", ...",tt0439100,7.9,121234.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131410,294726,Amy Bradley Is Missing,tv_series,False,/bx26v6trU13B2VfzSMfpMJe7OGb.jpg,,49,2025-07-16,"Documentary, Crime, Mystery","99, 80, 9648",https://www.netflix.com/title/81741332,False,en,2025-07-16,6338437.0,Message in a Bottle,The 20th anniversary of Amy's disappearance re...,6.800,4.0,2025-07-16,3.0,,40.0,1.0,294726.0,/ePfidK7fvwiWIy6gBdMwNJEm3ar.jpg,,Netflix,3.0,1,US,en,Amy Bradley Is Missing,This true-crime series investigates the 1998 d...,1.2347,/bS7hWmAGEFXbgSYyKqX0uapxWO7.jpg,AMPLE Entertainment,United States of America,S1: Miniseries (3 eps),English,Ended,Have you seen Amy?,Miniseries,7.071,35,,"miniseries, true crime, missing person","Iva Bradley, Ron Bradley, Brad Bradley, Erin S...","Ari Mark, Phil Lott","Ari Mark, Phil Lott",AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,,,tt37439438,6.7,6496.0
133087,297464,The Guest,tv_series,False,/wJBYXpzKHZNWnkfMuXhr40daPan.jpg,,42,2025-09-24,Drama,18,https://www.netflix.com/title/81733765,True,es,2025-09-24,6471067.0,Episode 20,,0.000,0.0,2025-09-24,20.0,,35.0,1.0,297464.0,/xafldBHy90yBzpa8qTowUuEGIWh.jpg,,Netflix,20.0,1,CO,es,La huésped,A seemingly perfect household unravels when an...,10.5993,/fBMRxziwviMBVLOCe1i4qtwRXKj.jpg,CMO Producciones,Colombia,S1: Season 1 (20 eps),Español,Returning Series,,Scripted,7.900,31,,psychological thriller,"Laura Londoño, Carmen Villalobos, Jason Day, K...","Israel Sanchez, Klych Lopez","Dario Venegas, Lina Uribe",AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,,,tt37065878,5.4,645.0
133302,298234,aka Charlie Sheen,tv_series,False,/qSxabAdLMiJWIJUb7hbSVvwMm4s.jpg,Andrew Renzi,92,2025-09-10,Documentary,99,https://www.netflix.com/title/82024990,False,en,2025-09-10,6448588.0,Part Deux,"As ""Two and a Half Men"" takes off, Charlie ent...",7.357,7.0,2025-09-10,2.0,,89.0,1.0,298234.0,/fYI49EABlkrW7NYz979D6xSlrOf.jpg,,Netflix,2.0,1,US,en,aka Charlie Sheen,Actor Charlie Sheen tells his story in this ca...,1.9062,/pHAM8Fzek7xwTbqmaxPXAjCAjz1.jpg,"Atlas Independent, Boardwalk Pictures, North o...",United States of America,S1: Miniseries (2 eps),English,Ended,This isn't a comeback. It's a revelation.,Miniseries,7.500,34,,"rise and fall, miniseries, redemption, recover...","Charlie Sheen, Jon Cryer, Sean Penn, Ramon Est...",Andrew Renzi,,AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,,,tt37898320,7.6,10533.0
134012,300258,Los hilos del pasado,tv_series,False,/AgOJ5Jgtj1ZVbBfWD4dArXbUdaC.jpg,José Alberto Castro,42,2025-09-10,"Soap, Drama","10766, 18",https://www.univision.com/shows/los-hilos-del-...,True,es,2025-11-04,6716737.0,Episode 56,,0.000,0.0,2025-11-28,56.0,,,1.0,300258.0,,"{""id"": 6716738, ""name"": ""Episode 57"", ""overvie...",Univision,43.0,1,US,en,Los hilos del pasado,Carolina Guillén is a renowned fashion designe...,4.7790,/k3dGAfWNvr23FQl5e8yDGxv847T.jpg,TelevisaUnivision,United States of America,S1: Season 1 (60 eps),Español,Returning Series,,Scripted,7.871,31,,,"Bárbara López, Yadhira Carrillo, Emmanuel Palo...",,,,,,tt36595665,6.4,53.0


### (2) 불필요한 컬럼 삭제

In [4]:
tv_series_filtered = tv_series_filtered.copy()
cols = ["imdb_rating_count", "imdb_rating", 
    "keyword", "imdb_id", "production_companies", "created_by", "poster_path",
    "type_detail", "seasons",
    "providers_flatrate", "overview", "genre_ids", "genres",
   "origin_country", "last_air_date", "first_air_date",
    "number_of_episodes", "title", "status", 
    "vote_average", "vote_count", "original_language", "number_of_seasons",
    "episode_run_time", "id", "in_production", "last_episode_to_air_vote_average"
]

tv_series_filtered = tv_series_filtered[cols]
tv_series_filtered

Unnamed: 0,imdb_rating_count,imdb_rating,keyword,imdb_id,production_companies,created_by,poster_path,type_detail,seasons,providers_flatrate,overview,genre_ids,genres,origin_country,last_air_date,first_air_date,number_of_episodes,title,status,vote_average,vote_count,original_language,number_of_seasons,episode_run_time,id,in_production,last_episode_to_air_vote_average
6,32826.0,7.2,"medium, psychic power, paranormal, criminal co...",tt0412175,"Pointe Studios, Picturemaker Productions, Gram...",Glenn Gordon Caron,/6tOxspW2MbbAQ0s3jtfs8QATcx.jpg,Scripted,S0: Specials (30 eps); S1: Season 1 (16 eps); ...,"{""AR"": [""Paramount Plus"", ""Paramount Plus Appl...",Allison Dubois works in the District Attorney’...,"80, 18, 9648, 10765","Crime, Drama, Mystery, Sci-Fi & Fantasy",US,2011-01-21,2005-01-03,128.0,Medium,Ended,7.462,553,en,7,42,36,False,8.000
12,4539.0,8.0,sitcom,tt0426654,"Talkback, Channel 4 Television","Chris Morris, Charlie Brooker",/jAd2zeYyWv89L6VRMAtPsssAZ7r.jpg,Scripted,S0: Specials (2 eps); S1: Season 1 (6 eps),{},Nathan Barley is a Channel 4 sitcom written by...,35,Comedy,GB,2005-03-18,2005-02-11,6.0,Nathan Barley,Ended,7.200,40,en,1,26,114,False,0.000
14,7507.0,5.0,"washington dc, usa, the white house, president...",tt0805815,"It's a Laugh Productions, Warren & Rinsler Pro...","Marc Warren, Dennis Rinsler",/kf7aiSIFDP1tkxsqckgWxIyuumw.jpg,Scripted,S1: Season 1 (21 eps); S2: Season 2 (13 eps),{},It's a brand new life for Cory Baxter when his...,"35, 10762","Comedy, Kids",US,2008-09-12,2007-01-12,34.0,Cory in the House,Ended,6.800,92,en,2,23,119,False,8.000
15,2940.0,6.7,"superhero, based on comic, big city, family, a...",tt0830298,"MoonScoop, Marvel Entertainment","Craig Kyle, Christopher L. Yost",/zCtCW0JlbMpzfQDAX1bO1o1KLi.jpg,Scripted,S0: Specials (3 eps); S1: Season 1 (26 eps),"{""CA"": [""Disney Plus""], ""US"": [""Disney Plus""]}","Mr. Fantastic, the Invisible Woman, the Human ...","16, 10759, 10765, 10762","Animation, Action & Adventure, Sci-Fi & Fantas...",US,2007-10-20,2006-09-02,26.0,Fantastic Four: World's Greatest Heroes,Ended,7.500,93,en,1,22,132,False,5.000
23,121234.0,7.9,"drug dealer, outlaw, dysfunctional family, dar...",tt0439100,"Tilted Productions, Lionsgate Television",Jenji Kohan,/gKUsBTa5b0GY7U4iWBseeBalfjO.jpg,Scripted,S0: Specials (1 eps); S1: Season 1 (10 eps); S...,"{""AD"": [""Netflix""], ""AE"": [""Netflix""], ""AG"": [...","After the unexpected death of her husband, a s...","35, 80, 18","Comedy, Crime, Drama",US,2012-09-16,2005-08-07,102.0,Weeds,Ended,7.484,935,en,8,27,186,False,7.800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131410,6496.0,6.7,"miniseries, true crime, missing person",tt37439438,AMPLE Entertainment,,/bS7hWmAGEFXbgSYyKqX0uapxWO7.jpg,Miniseries,S1: Miniseries (3 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,This true-crime series investigates the 1998 d...,"99, 80, 9648","Documentary, Crime, Mystery",US,2025-07-16,2025-07-16,3.0,Amy Bradley Is Missing,Ended,7.071,35,en,1,49,294726,False,6.800
133087,645.0,5.4,psychological thriller,tt37065878,CMO Producciones,,/fBMRxziwviMBVLOCe1i4qtwRXKj.jpg,Scripted,S1: Season 1 (20 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,A seemingly perfect household unravels when an...,18,Drama,CO,2025-09-24,2025-09-24,20.0,The Guest,Returning Series,7.900,31,es,1,42,297464,True,0.000
133302,10533.0,7.6,"rise and fall, miniseries, redemption, recover...",tt37898320,"Atlas Independent, Boardwalk Pictures, North o...",Andrew Renzi,/pHAM8Fzek7xwTbqmaxPXAjCAjz1.jpg,Miniseries,S1: Miniseries (2 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,Actor Charlie Sheen tells his story in this ca...,99,Documentary,US,2025-09-10,2025-09-10,2.0,aka Charlie Sheen,Ended,7.500,34,en,1,92,298234,False,7.357
134012,53.0,6.4,,tt36595665,TelevisaUnivision,José Alberto Castro,/k3dGAfWNvr23FQl5e8yDGxv847T.jpg,Scripted,S1: Season 1 (60 eps),,Carolina Guillén is a renowned fashion designe...,"10766, 18","Soap, Drama",US,2025-11-04,2025-09-10,43.0,Los hilos del pasado,Returning Series,7.871,31,en,1,42,300258,True,0.000


### (3) TV 시리즈 중 비드라마 작품의 장르/타이틀 필터링 -> 모두 드라마 맞는 것으로 확인 -> 그대로 진행할지 논의 필요

In [5]:
tv_series_filtered['genres']

6                   Crime, Drama, Mystery, Sci-Fi & Fantasy
12                                                   Comedy
14                                             Comedy, Kids
15        Animation, Action & Adventure, Sci-Fi & Fantas...
23                                     Comedy, Crime, Drama
                                ...                        
131410                          Documentary, Crime, Mystery
133087                                                Drama
133302                                          Documentary
134012                                          Soap, Drama
135914                   Drama, Action & Adventure, Mystery
Name: genres, Length: 6730, dtype: object

In [9]:
pd.set_option('display.max_rows', 1000) 
non_drama_genre_pattern = (
    'Reality|Documentary|Talk|News|Kids|Animation|Game Show'
)

non_drama_title_keywords = [
    'wwe', 'wrestling', 'talent', 'survivor',
    'idol', 'competition', 'variety', 'battle'
]

series_to_dramas = tv_series_filtered[
    # 1) 장르에 Drama 반드시 포함
    tv_series_filtered['genres'].str.contains('Drama', case=False, na=False)
    
    # 2) 예능/다큐/키즈/애니 제거
    &
    ~tv_series_filtered['genres'].str.contains(
        non_drama_genre_pattern, case=False, na=False
    )

    # 3) 제목 기반 오디션/레슬링/쇼 제거
    &
    ~tv_series_filtered['title'].str.contains(
        '|'.join(non_drama_title_keywords),
        case=False, na=False
    )
].copy()

series_to_dramas[['title','genres']]

Unnamed: 0,title,genres
6,Medium,"Crime, Drama, Mystery, Sci-Fi & Fantasy"
23,Weeds,"Comedy, Crime, Drama"
27,The Game,"Drama, Comedy"
30,Kyle XY,"Sci-Fi & Fantasy, Drama"
43,Dirt,Drama
...,...,...
129263,Sara - Woman in the Shadows,"Mystery, Drama, Crime"
129957,Revenged Love,"Drama, Action & Adventure"
133087,The Guest,Drama
134012,Los hilos del pasado,"Soap, Drama"


### (4) tv_series_dwc와 결합

In [16]:
# 두 테이블 결합
dramas_merged = series_to_dramas.merge(
    tv_series_dwc,
    on="imdb_id",
    how="left",
    suffixes=("", "_dwc")
)
dramas_merged

Unnamed: 0,imdb_rating_count,imdb_rating,keyword,imdb_id,production_companies,created_by,poster_path,type_detail,seasons,providers_flatrate,overview,genre_ids,genres,origin_country,last_air_date,first_air_date,number_of_episodes,title,status,vote_average,vote_count,original_language,number_of_seasons,episode_run_time,id,in_production,last_episode_to_air_vote_average,series_id,title_dwc,original_name,first_air_date_dwc,vote_average_dwc,vote_count_dwc,popularity,executive_producer_name,executive_producer_ids,executive_producer_gender,executive_producer_profile_path,writers_name,writer_roles,writer_ids,writer_gender,writer_profile_path,top_cast_order,top_cast,character,top_cast_ids,top_cast_gender,top_cast_profile_path
0,32826.0,7.2,"medium, psychic power, paranormal, criminal co...",tt0412175,"Pointe Studios, Picturemaker Productions, Gram...",Glenn Gordon Caron,/6tOxspW2MbbAQ0s3jtfs8QATcx.jpg,Scripted,S0: Specials (30 eps); S1: Season 1 (16 eps); ...,"{""AR"": [""Paramount Plus"", ""Paramount Plus Appl...",Allison Dubois works in the District Attorney’...,"80, 18, 9648, 10765","Crime, Drama, Mystery, Sci-Fi & Fantasy",US,2011-01-21,2005-01-03,128.0,Medium,Ended,7.462,553,en,7,42,36,False,8.000,36.0,Medium,Medium,2005-01-03,7.462,553.0,22.7821,,,,,Jacob Cooney,Writers' Assistant,1318326,2,/4hEMgTX1BBXXNPGNFga8xY3e7MS.jpg,0; 1; 2; 3; 4,Patricia Arquette; Jake Weber; Miguel Sandoval...,Allison Dubois; Joe Dubois; Manuel Devalos; Le...,4687; 4177; 30488; 155282; 95039,1; 2; 2; 2; 1,/jeThSouMatiuRiLkjDvSBLHpmq0.jpg; /z1RTh4qvyMF...
1,121234.0,7.9,"drug dealer, outlaw, dysfunctional family, dar...",tt0439100,"Tilted Productions, Lionsgate Television",Jenji Kohan,/gKUsBTa5b0GY7U4iWBseeBalfjO.jpg,Scripted,S0: Specials (1 eps); S1: Season 1 (10 eps); S...,"{""AD"": [""Netflix""], ""AE"": [""Netflix""], ""AG"": [...","After the unexpected death of her husband, a s...","35, 80, 18","Comedy, Crime, Drama",US,2012-09-16,2005-08-07,102.0,Weeds,Ended,7.484,935,en,8,27,186,False,7.800,186.0,Weeds,Weeds,2005-08-07,7.483,940.0,19.2040,Mark A. Burley; Matthew Salsberg; Roberto Bena...,2609135; 1228259; 1214449; 1214409,2; 0; 2; 1,; ; ; /f3ypLKqGYrbJbh9b0mdIqyGeOqx.jpg,,,,,,0; 5; 6; 7; 8,Mary-Louise Parker; Justin Kirk; Hunter Parris...,Nancy Botwin; Andy Botwin; Silas Botwin; Shane...,18248; 20301; 35236; 12; 58478,1; 2; 2; 2; 2,/1ohhrIZ4OMlLx9DvHjPhQJAIP0F.jpg; /68QWfaIoOYy...
2,5070.0,6.9,american football,tt0772137,,Mara Brock Akil,/zmvRaCRcmi6lsriYxZmAR6uWHPS.jpg,Scripted,S0: Specials (0 eps); S1: Season 1 (22 eps); S...,"{""NZ"": [""TVNZ+""], ""RU"": [""Amediateka""], ""US"": ...",When her boyfriend Derwin Davis is chosen as t...,"18, 35","Drama, Comedy",US,2015-08-05,2006-10-01,147.0,The Game,Ended,7.500,40,en,9,30,211,False,0.000,211.0,The Game,The Game,2006-10-01,7.500,40.0,6.0711,,,,,,,,,,2; 3; 4; 5; 6,Coby Bell; Brittany Daniel; Wendy Raquel Robin...,Jason Pitts; Kelly Pitts; Tasha Mack; Malik Wr...,102775; 35476; 52116; 206378; 98772,2; 1; 1; 2; 1,/2fOJuToMdCTk1KI0Kod8wRT8gFq.jpg; /vVnKokLIiv6...
3,31609.0,7.4,"experiment, seattle, washington, secret organi...",tt0756509,,"Eric Bress, J. Mackye Gruber",/4omwjf3HgGx4nZXcA0Avmd9bQny.jpg,Scripted,S0: Specials (4 eps); S1: Season 1 (10 eps); S...,"{""AR"": [""Disney Plus""], ""AU"": [""Disney Plus""],...",A suburban family that takes in a mysterious t...,"10765, 18","Sci-Fi & Fantasy, Drama",US,2009-03-16,2006-06-26,43.0,Kyle XY,Canceled,7.044,365,en,3,44,219,False,7.500,219.0,Kyle XY,Kyle XY,2006-06-26,7.044,365.0,5.7439,,,,,,,,,,0; 1; 2; 3; 4,Matt Dallas; Marguerite MacIntyre; Bruce Thoma...,Kyle Trager; Nicole Trager; Stephen Trager; Lo...,79494; 166566; 79497; 180320; 205204,2; 1; 2; 1; 2,/kFGLCt1OEwWwCl8tdKhkcO8dnm.jpg; /ignfCe6NuP6a...
4,7251.0,6.9,"career woman, magazine, hollywood, tabloid",tt0496275,"Coquette Productions, Matthew Carnahan Circus ...",Matthew Carnahan,/JZs8u8XfZJDjv72LUidoXdb9qz.jpg,Scripted,S1: Season 1 (13 eps); S2: Season 2 (7 eps),"{""AU"": [""Disney Plus""], ""NZ"": [""Disney Plus""]}",Dirt is an American television serial broadcas...,18,Drama,US,2008-04-13,2007-01-02,20.0,Dirt,Canceled,6.675,40,en,2,55,284,False,0.000,284.0,Dirt,Dirt,2007-01-02,6.700,40.0,2.7816,,,,,,,,,,0; 1; 2; 3; 6,Courteney Cox; Ian Hart; Josh Stewart; Alexand...,Lucy Spiller; Don Konkey; Holt McLaren; Willa ...,14405; 10985; 40039; 58369; 1181353,1; 2; 2; 1; 2,/yA8dicwtcVuxG3gh94QsaRb5gNb.jpg; /xMC8aPDwhEZ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3576,2004.0,6.3,,tt28243363,Palomar,Carmine Elia,/9QVSjcrWGjV46DlSLfh9JI4oIaa.jpg,Scripted,S1: Season 1 (6 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,The suspicious death of her son pushes a forme...,"9648, 18, 80","Mystery, Drama, Crime",IT,2025-06-03,2025-06-03,6.0,Sara - Woman in the Shadows,Returning Series,6.400,32,it,1,54,291256,True,4.333,291256.0,Sara - Woman in the Shadows,Sara - La donna nell'ombra,2025-06-03,6.400,32.0,2.5598,,,,,,,,,,0; 1; 2; 3; 4,Teresa Saponangelo; Claudia Gerini; Flavio Fur...,Sara; Teresa; Pardo; Viola; Corrado Lembo,116532; 8776; 1794066; 3185219; 131632,1; 1; 2; 1; 2,/bX7qS1ighwuT7GuUxig4c9vcuFy.jpg; /QFBen9LPrKu...
3577,4212.0,8.8,"love triangle, based on novel or book, ex-boyf...",tt37370861,,"Chai Jidan, Gong Yu Shi",/j5KWzpPAi4JC42GGrVWDFL6G2gY.jpg,Scripted,S1: Season 1 (24 eps),BB: Rakuten Viki; BM: Rakuten Viki; BS: Rakute...,"After Wu Suo-wei's girlfriend dumps him, he de...","18, 10759","Drama, Action & Adventure",CN,2025-08-12,2025-06-16,24.0,Revenged Love,Ended,8.260,52,zh,1,45,292035,False,0.000,292035.0,Revenged Love,逆爱,2025-06-16,8.255,53.0,26.3069,,,,,Chai Jidan,Writer,1833173,1,/lkTvQJCkTPviRnGQTH1D2nrz9xR.jpg,0; 1; 2; 3; 4,Zi Yu; Tian Xuning; Liu Xuancheng; Zhan Xuan; ...,Wu Suowei; Chi Cheng; Jiang Xiaoshuai; Guo Che...,4770618; 3176248; 5270820; 4149728; 5507659,2; 2; 2; 2; 2,/rh3HC3dQn2EJ3H0uz4iwX3VNRPv.jpg; /cja4cqfeaTt...
3578,645.0,5.4,psychological thriller,tt37065878,CMO Producciones,,/fBMRxziwviMBVLOCe1i4qtwRXKj.jpg,Scripted,S1: Season 1 (20 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,A seemingly perfect household unravels when an...,18,Drama,CO,2025-09-24,2025-09-24,20.0,The Guest,Returning Series,7.900,31,es,1,42,297464,True,0.000,297464.0,The Guest,La huésped,2025-09-24,7.900,31.0,9.8350,,,,,Dario Venegas; Lina Uribe,Writer; Writer,5638231; 1563037,0; 1,;,0; 1; 2; 3; 4,Laura Londoño; Carmen Villalobos; Jason Day; K...,Silvia; Sonia; Lorenzo; Isabella; Miguel,1512786; 1418219; 933264; 3095963; 1411832,1; 1; 2; 1; 0,/qlIvmFOOIbvxfOZqUFyfqaBkRBJ.jpg; ; /xDrIZ5GLe...
3579,53.0,6.4,,tt36595665,TelevisaUnivision,José Alberto Castro,/k3dGAfWNvr23FQl5e8yDGxv847T.jpg,Scripted,S1: Season 1 (60 eps),,Carolina Guillén is a renowned fashion designe...,"10766, 18","Soap, Drama",US,2025-11-04,2025-09-10,43.0,Los hilos del pasado,Returning Series,7.871,31,en,1,42,300258,True,0.000,300258.0,Los hilos del pasado,Los hilos del pasado,2025-09-10,7.813,32.0,4.0917,,,,,,,,,,0; 1; 2; 3; 4,Bárbara López; Yadhira Carrillo; Emmanuel Palo...,Cristina Miranda; Carolina Guillén; Carlos Nav...,2328501; 1246105; 1892023; 1244282; 1222995,1; 1; 2; 2; 1,; /jICToUbKGlJoCcDFOyiEa1m3Zab.jpg; ; /2l8Spih...


### (5) 결합 후 컬럼 정리

In [17]:
# 기존 컬럼 확인
series_to_dramas.columns

Index(['imdb_rating_count', 'imdb_rating', 'keyword', 'imdb_id',
       'production_companies', 'created_by', 'poster_path', 'type_detail',
       'seasons', 'providers_flatrate', 'overview', 'genre_ids', 'genres',
       'origin_country', 'last_air_date', 'first_air_date',
       'number_of_episodes', 'title', 'status', 'vote_average', 'vote_count',
       'original_language', 'number_of_seasons', 'episode_run_time', 'id',
       'in_production', 'last_episode_to_air_vote_average'],
      dtype='object')

In [18]:
# 결합 후 컬럼 확인
dramas_merged.columns

Index(['imdb_rating_count', 'imdb_rating', 'keyword', 'imdb_id',
       'production_companies', 'created_by', 'poster_path', 'type_detail',
       'seasons', 'providers_flatrate', 'overview', 'genre_ids', 'genres',
       'origin_country', 'last_air_date', 'first_air_date',
       'number_of_episodes', 'title', 'status', 'vote_average', 'vote_count',
       'original_language', 'number_of_seasons', 'episode_run_time', 'id',
       'in_production', 'last_episode_to_air_vote_average', 'series_id',
       'title_dwc', 'original_name', 'first_air_date_dwc', 'vote_average_dwc',
       'vote_count_dwc', 'popularity', 'executive_producer_name',
       'executive_producer_ids', 'executive_producer_gender',
       'executive_producer_profile_path', 'writers_name', 'writer_roles',
       'writer_ids', 'writer_gender', 'writer_profile_path', 'top_cast_order',
       'top_cast', 'character', 'top_cast_ids', 'top_cast_gender',
       'top_cast_profile_path'],
      dtype='object')

In [None]:
# series_to_dramas의 원본 컬럼을 유지하고, dwc에서 충돌나는 컬럼 4개 제거
drop_cols = [
    "title_dwc",
    "first_air_date_dwc",
    "vote_average_dwc",
    "vote_count_dwc"
]

dramas_merged_final = dramas_merged.drop(columns=drop_cols).copy()

In [21]:
dramas_merged_final.columns

Index(['imdb_rating_count', 'imdb_rating', 'keyword', 'imdb_id',
       'production_companies', 'created_by', 'poster_path', 'type_detail',
       'seasons', 'providers_flatrate', 'overview', 'genre_ids', 'genres',
       'origin_country', 'last_air_date', 'first_air_date',
       'number_of_episodes', 'title', 'status', 'vote_average', 'vote_count',
       'original_language', 'number_of_seasons', 'episode_run_time', 'id',
       'in_production', 'last_episode_to_air_vote_average', 'series_id',
       'original_name', 'popularity', 'executive_producer_name',
       'executive_producer_ids', 'executive_producer_gender',
       'executive_producer_profile_path', 'writers_name', 'writer_roles',
       'writer_ids', 'writer_gender', 'writer_profile_path', 'top_cast_order',
       'top_cast', 'character', 'top_cast_ids', 'top_cast_gender',
       'top_cast_profile_path'],
      dtype='object')

In [None]:
# 컬럼 정리 전후 개수 비교 (4개 정상 삭제)
print(dramas_merged.shape)
print(dramas_merged_final.shape)

(3581, 49)
(3581, 45)


In [None]:
# 필터링 후 시리즈 개수 변화 (드라마 필터링 전 6,730개 -> 필터 후 3,581개 남음) -3,149개 걸러짐
tv_series_filtered.shape, dramas_merged_final.shape

((6730, 27), (3581, 45))

### (6) 새 파일로 저장 (dramas_re.parquet)

In [None]:
# 필터링된 드라마 데이터 (3581 rows × 27 columns)
dramas_merged_final.to_parquet('dramas_merged_final.parquet', index=False)

## 드라마 데이터 전처리

In [None]:
# 드라마만 필터링된 파일 불러오기 (드라마 vote_count>=30 필터 + 비드라마 제외 + director, writer, top_cast 반영)
dramas = pd.read_parquet('dramas_merged_final.parquet')
dramas

Unnamed: 0,imdb_rating_count,imdb_rating,keyword,imdb_id,production_companies,created_by,poster_path,type_detail,seasons,providers_flatrate,overview,genre_ids,genres,origin_country,last_air_date,first_air_date,number_of_episodes,title,status,vote_average,vote_count,original_language,number_of_seasons,episode_run_time,id,in_production,last_episode_to_air_vote_average,series_id,original_name,popularity,executive_producer_name,executive_producer_ids,executive_producer_gender,executive_producer_profile_path,writers_name,writer_roles,writer_ids,writer_gender,writer_profile_path,top_cast_order,top_cast,character,top_cast_ids,top_cast_gender,top_cast_profile_path
0,32826.0,7.2,"medium, psychic power, paranormal, criminal co...",tt0412175,"Pointe Studios, Picturemaker Productions, Gram...",Glenn Gordon Caron,/6tOxspW2MbbAQ0s3jtfs8QATcx.jpg,Scripted,S0: Specials (30 eps); S1: Season 1 (16 eps); ...,"{""AR"": [""Paramount Plus"", ""Paramount Plus Appl...",Allison Dubois works in the District Attorney’...,"80, 18, 9648, 10765","Crime, Drama, Mystery, Sci-Fi & Fantasy",US,2011-01-21,2005-01-03,128.0,Medium,Ended,7.462,553,en,7,42,36,False,8.000,36.0,Medium,22.7821,,,,,Jacob Cooney,Writers' Assistant,1318326,2,/4hEMgTX1BBXXNPGNFga8xY3e7MS.jpg,0; 1; 2; 3; 4,Patricia Arquette; Jake Weber; Miguel Sandoval...,Allison Dubois; Joe Dubois; Manuel Devalos; Le...,4687; 4177; 30488; 155282; 95039,1; 2; 2; 2; 1,/jeThSouMatiuRiLkjDvSBLHpmq0.jpg; /z1RTh4qvyMF...
1,121234.0,7.9,"drug dealer, outlaw, dysfunctional family, dar...",tt0439100,"Tilted Productions, Lionsgate Television",Jenji Kohan,/gKUsBTa5b0GY7U4iWBseeBalfjO.jpg,Scripted,S0: Specials (1 eps); S1: Season 1 (10 eps); S...,"{""AD"": [""Netflix""], ""AE"": [""Netflix""], ""AG"": [...","After the unexpected death of her husband, a s...","35, 80, 18","Comedy, Crime, Drama",US,2012-09-16,2005-08-07,102.0,Weeds,Ended,7.484,935,en,8,27,186,False,7.800,186.0,Weeds,19.2040,Mark A. Burley; Matthew Salsberg; Roberto Bena...,2609135; 1228259; 1214449; 1214409,2; 0; 2; 1,; ; ; /f3ypLKqGYrbJbh9b0mdIqyGeOqx.jpg,,,,,,0; 5; 6; 7; 8,Mary-Louise Parker; Justin Kirk; Hunter Parris...,Nancy Botwin; Andy Botwin; Silas Botwin; Shane...,18248; 20301; 35236; 12; 58478,1; 2; 2; 2; 2,/1ohhrIZ4OMlLx9DvHjPhQJAIP0F.jpg; /68QWfaIoOYy...
2,5070.0,6.9,american football,tt0772137,,Mara Brock Akil,/zmvRaCRcmi6lsriYxZmAR6uWHPS.jpg,Scripted,S0: Specials (0 eps); S1: Season 1 (22 eps); S...,"{""NZ"": [""TVNZ+""], ""RU"": [""Amediateka""], ""US"": ...",When her boyfriend Derwin Davis is chosen as t...,"18, 35","Drama, Comedy",US,2015-08-05,2006-10-01,147.0,The Game,Ended,7.500,40,en,9,30,211,False,0.000,211.0,The Game,6.0711,,,,,,,,,,2; 3; 4; 5; 6,Coby Bell; Brittany Daniel; Wendy Raquel Robin...,Jason Pitts; Kelly Pitts; Tasha Mack; Malik Wr...,102775; 35476; 52116; 206378; 98772,2; 1; 1; 2; 1,/2fOJuToMdCTk1KI0Kod8wRT8gFq.jpg; /vVnKokLIiv6...
3,31609.0,7.4,"experiment, seattle, washington, secret organi...",tt0756509,,"Eric Bress, J. Mackye Gruber",/4omwjf3HgGx4nZXcA0Avmd9bQny.jpg,Scripted,S0: Specials (4 eps); S1: Season 1 (10 eps); S...,"{""AR"": [""Disney Plus""], ""AU"": [""Disney Plus""],...",A suburban family that takes in a mysterious t...,"10765, 18","Sci-Fi & Fantasy, Drama",US,2009-03-16,2006-06-26,43.0,Kyle XY,Canceled,7.044,365,en,3,44,219,False,7.500,219.0,Kyle XY,5.7439,,,,,,,,,,0; 1; 2; 3; 4,Matt Dallas; Marguerite MacIntyre; Bruce Thoma...,Kyle Trager; Nicole Trager; Stephen Trager; Lo...,79494; 166566; 79497; 180320; 205204,2; 1; 2; 1; 2,/kFGLCt1OEwWwCl8tdKhkcO8dnm.jpg; /ignfCe6NuP6a...
4,7251.0,6.9,"career woman, magazine, hollywood, tabloid",tt0496275,"Coquette Productions, Matthew Carnahan Circus ...",Matthew Carnahan,/JZs8u8XfZJDjv72LUidoXdb9qz.jpg,Scripted,S1: Season 1 (13 eps); S2: Season 2 (7 eps),"{""AU"": [""Disney Plus""], ""NZ"": [""Disney Plus""]}",Dirt is an American television serial broadcas...,18,Drama,US,2008-04-13,2007-01-02,20.0,Dirt,Canceled,6.675,40,en,2,55,284,False,0.000,284.0,Dirt,2.7816,,,,,,,,,,0; 1; 2; 3; 6,Courteney Cox; Ian Hart; Josh Stewart; Alexand...,Lucy Spiller; Don Konkey; Holt McLaren; Willa ...,14405; 10985; 40039; 58369; 1181353,1; 2; 2; 1; 2,/yA8dicwtcVuxG3gh94QsaRb5gNb.jpg; /xMC8aPDwhEZ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3576,2004.0,6.3,,tt28243363,Palomar,Carmine Elia,/9QVSjcrWGjV46DlSLfh9JI4oIaa.jpg,Scripted,S1: Season 1 (6 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,The suspicious death of her son pushes a forme...,"9648, 18, 80","Mystery, Drama, Crime",IT,2025-06-03,2025-06-03,6.0,Sara - Woman in the Shadows,Returning Series,6.400,32,it,1,54,291256,True,4.333,291256.0,Sara - La donna nell'ombra,2.5598,,,,,,,,,,0; 1; 2; 3; 4,Teresa Saponangelo; Claudia Gerini; Flavio Fur...,Sara; Teresa; Pardo; Viola; Corrado Lembo,116532; 8776; 1794066; 3185219; 131632,1; 1; 2; 1; 2,/bX7qS1ighwuT7GuUxig4c9vcuFy.jpg; /QFBen9LPrKu...
3577,4212.0,8.8,"love triangle, based on novel or book, ex-boyf...",tt37370861,,"Chai Jidan, Gong Yu Shi",/j5KWzpPAi4JC42GGrVWDFL6G2gY.jpg,Scripted,S1: Season 1 (24 eps),BB: Rakuten Viki; BM: Rakuten Viki; BS: Rakute...,"After Wu Suo-wei's girlfriend dumps him, he de...","18, 10759","Drama, Action & Adventure",CN,2025-08-12,2025-06-16,24.0,Revenged Love,Ended,8.260,52,zh,1,45,292035,False,0.000,292035.0,逆爱,26.3069,,,,,Chai Jidan,Writer,1833173,1,/lkTvQJCkTPviRnGQTH1D2nrz9xR.jpg,0; 1; 2; 3; 4,Zi Yu; Tian Xuning; Liu Xuancheng; Zhan Xuan; ...,Wu Suowei; Chi Cheng; Jiang Xiaoshuai; Guo Che...,4770618; 3176248; 5270820; 4149728; 5507659,2; 2; 2; 2; 2,/rh3HC3dQn2EJ3H0uz4iwX3VNRPv.jpg; /cja4cqfeaTt...
3578,645.0,5.4,psychological thriller,tt37065878,CMO Producciones,,/fBMRxziwviMBVLOCe1i4qtwRXKj.jpg,Scripted,S1: Season 1 (20 eps),AD: Netflix; AE: Netflix; AG: Netflix; AL: Net...,A seemingly perfect household unravels when an...,18,Drama,CO,2025-09-24,2025-09-24,20.0,The Guest,Returning Series,7.900,31,es,1,42,297464,True,0.000,297464.0,La huésped,9.8350,,,,,Dario Venegas; Lina Uribe,Writer; Writer,5638231; 1563037,0; 1,;,0; 1; 2; 3; 4,Laura Londoño; Carmen Villalobos; Jason Day; K...,Silvia; Sonia; Lorenzo; Isabella; Miguel,1512786; 1418219; 933264; 3095963; 1411832,1; 1; 2; 1; 0,/qlIvmFOOIbvxfOZqUFyfqaBkRBJ.jpg; ; /xDrIZ5GLe...
3579,53.0,6.4,,tt36595665,TelevisaUnivision,José Alberto Castro,/k3dGAfWNvr23FQl5e8yDGxv847T.jpg,Scripted,S1: Season 1 (60 eps),,Carolina Guillén is a renowned fashion designe...,"10766, 18","Soap, Drama",US,2025-11-04,2025-09-10,43.0,Los hilos del pasado,Returning Series,7.871,31,en,1,42,300258,True,0.000,300258.0,Los hilos del pasado,4.0917,,,,,,,,,,0; 1; 2; 3; 4,Bárbara López; Yadhira Carrillo; Emmanuel Palo...,Cristina Miranda; Carolina Guillén; Carlos Nav...,2328501; 1246105; 1892023; 1244282; 1222995,1; 1; 2; 2; 1,; /jICToUbKGlJoCcDFOyiEa1m3Zab.jpg; ; /2l8Spih...


In [25]:
# 드라마 타입 정보 확인
dramas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3581 entries, 0 to 3580
Data columns (total 45 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   imdb_rating_count                 3413 non-null   float64
 1   imdb_rating                       3413 non-null   float64
 2   keyword                           3212 non-null   object 
 3   imdb_id                           3581 non-null   object 
 4   production_companies              3334 non-null   object 
 5   created_by                        3094 non-null   object 
 6   poster_path                       3579 non-null   object 
 7   type_detail                       3581 non-null   object 
 8   seasons                           3581 non-null   object 
 9   providers_flatrate                3476 non-null   object 
 10  overview                          3518 non-null   object 
 11  genre_ids                         3581 non-null   object 
 12  genres

In [26]:
# 드라마 결측률 확인
dramas.isna().mean().sort_values(ascending=False)

created_by                          0.135996
keyword                             0.103044
production_companies                0.068975
imdb_rating_count                   0.046914
imdb_rating                         0.046914
providers_flatrate                  0.029321
overview                            0.017593
origin_country                      0.000838
poster_path                         0.000559
character                           0.000279
writers_name                        0.000279
popularity                          0.000279
executive_producer_name             0.000279
top_cast_gender                     0.000279
executive_producer_ids              0.000279
executive_producer_gender           0.000279
executive_producer_profile_path     0.000279
writer_roles                        0.000279
top_cast_ids                        0.000279
writer_ids                          0.000279
writer_gender                       0.000279
writer_profile_path                 0.000279
series_id 

### (1) 결측치 처리

#### 결측 컬럼 삭제

In [None]:
# 중요도 낮은 created_by 컬럼 제거
dramas_cleaned = dramas.drop(columns=['created_by', 'popularity', ]).copy()

#### 결측치 대체

In [None]:
# 데이터 타입 확인
dramas_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3581 entries, 0 to 3580
Data columns (total 44 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   imdb_rating_count                 3413 non-null   float64
 1   imdb_rating                       3413 non-null   float64
 2   keyword                           3581 non-null   object 
 3   imdb_id                           3581 non-null   object 
 4   production_companies              3334 non-null   object 
 5   poster_path                       3579 non-null   object 
 6   type_detail                       3581 non-null   object 
 7   seasons                           3581 non-null   object 
 8   providers_flatrate                3476 non-null   object 
 9   overview                          3518 non-null   object 
 10  genre_ids                         3581 non-null   object 
 11  genres                            3581 non-null   object 
 12  origin

In [None]:
# 중요도 낮은 텍스트 / 카테고리형 → "none" 처리
dramas_cleaned[["keyword", 'providers_flatrate', 'origin_country', 'poster_path', 
                'character', 'writers_name', 'executive_producer_name', 'top_cast_gender',
                'executive_producer_ids', 'executive_producer_gender', ]] = dramas_cleaned[[]].fillna("none")

# production_companies도 중요도 낮으므로 "none" 처리
dramas_cleaned["production_companies"] = dramas_cleaned["production_companies"].fillna("none")


In [None]:
# 결측치 있는 행 제거
dramas_cleaned = dramas.dropna(subset=['imdb_rating_count', 'imdb_rating', 'overview', 
                                       'executive_producer_profile_path', 'writer_roles', 'top_cast_ids', 'writer_ids', 'writer_gender',
                                       'writer_profile_path', 'series_id', 'top_cast_order', 'top_cast', 'original_name', 'top_cast_profile_path'
                                       ]).copy()
dramas_cleaned.isna().mean().sort_values(ascending=False)

imdb_rating_count                   0.0
last_air_date                       0.0
in_production                       0.0
id                                  0.0
episode_run_time                    0.0
number_of_seasons                   0.0
original_language                   0.0
vote_count                          0.0
vote_average                        0.0
status                              0.0
title                               0.0
number_of_episodes                  0.0
first_air_date                      0.0
origin_country                      0.0
imdb_rating                         0.0
genres                              0.0
genre_ids                           0.0
overview                            0.0
providers_flatrate                  0.0
seasons                             0.0
type_detail                         0.0
poster_path                         0.0
created_by                          0.0
production_companies                0.0
imdb_id                             0.0


In [None]:
# 결측치 제거 전후 비교
# print(dramas.shape)
# print(dramas_cleaned.shape)

(3581, 45)
(3581, 44)
