# Import Module

In [562]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg
from numpy.random import shuffle
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import matplotlib.pyplot as plt
import ydata_profiling as ypf
import json, requests, sys
import seaborn as sns
import pandas as pd
import numpy as np
import math
import time
import os
import re

In [563]:
np.set_printoptions(precision = 2, linewidth = 100, suppress = True)

In [564]:
pd.set_option('display.max_rows', 7, 'display.max_columns', None, 'display.float_format', '{:.2f}'.format)

# Prepare Dataset

### Import dataset

In [565]:
df_nfx = pd.read_csv('netflix_titles(pre).csv', encoding = 'unicode_escape')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,,,,,,,,,,,,,,,
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,,,,,,,,,,,,,,,
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...,,,,,,,,,,,,,,,
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",United Kingdom,,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ...",,,,,,,,,,,,,,,
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",United States,,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...,,,,,,,,,,,,,,,


## Drop duplicated rows

In [566]:
df_nfx = df_nfx.drop_duplicates(ignore_index = True)

df_nfx

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,,,,,,,,,,,,,,,
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,,,,,,,,,,,,,,,
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...,,,,,,,,,,,,,,,
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",United Kingdom,,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ...",,,,,,,,,,,,,,,
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",United States,,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...,,,,,,,,,,,,,,,


## Check for NAN values

In [567]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [568]:
df_nfx.isnull().sum()

show_id            0
type               0
title              0
director        1968
cast             573
country          476
date_added        11
release_year       0
rating            10
duration           0
listed_in          0
description        0
Unnamed: 12     6228
Unnamed: 13     6228
Unnamed: 14     6228
Unnamed: 15     6228
Unnamed: 16     6228
Unnamed: 17     6228
Unnamed: 18     6228
Unnamed: 19     6228
Unnamed: 20     6228
Unnamed: 21     6229
Unnamed: 22     6229
Unnamed: 23     6229
Unnamed: 24     6229
Unnamed: 25     6229
Unnamed: 26     6230
dtype: int64

In [569]:
pd.set_option("display.max_rows", 7, "display.max_columns", None)

## Drop rows of all NAN

In [570]:
df_nfx = df_nfx.dropna(how = 'all')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,,,,,,,,,,,,,,,
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,,,,,,,,,,,,,,,
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...,,,,,,,,,,,,,,,
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",United Kingdom,,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ...",,,,,,,,,,,,,,,
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",United States,,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...,,,,,,,,,,,,,,,


## Drop columns of NAN

In [571]:
df_nfx = df_nfx.drop(df_nfx.iloc[:, 12:], axis = 'columns')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
...,...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",United Kingdom,,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ..."
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",United States,,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...


In [572]:
df_nfx['description'][0]

'Before planning an awesome wedding for his grandfather, a polar bear king must take back a stolen artifact from an evil archaeologist first.'

## Preview dataset

In [573]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [574]:
df_nfx.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6231 entries, 0 to 6230
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       6231 non-null   int64 
 1   type          6231 non-null   object
 2   title         6231 non-null   object
 3   director      4263 non-null   object
 4   cast          5658 non-null   object
 5   country       5755 non-null   object
 6   date_added    6220 non-null   object
 7   release_year  6231 non-null   int64 
 8   rating        6221 non-null   object
 9   duration      6231 non-null   object
 10  listed_in     6231 non-null   object
 11  description   6231 non-null   object
dtypes: int64(2), object(10)
memory usage: 5.6 MB


In [575]:
df_nfx.memory_usage(deep = True)

Index               132
show_id           49848
type             390258
title            472090
director         377605
cast            1022786
country          414716
date_added       412630
release_year      49848
rating           382615
duration         399111
listed_in        563681
description     1310168
dtype: int64

In [576]:
df_nfx.describe(include = 'all')

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
count,6231.0,6231,6231,4263,5658,5755,6220,6231.0,6221,6231,6231,6231
unique,,2,6169,3299,5463,553,1527,,14,201,461,6220
top,,Movie,The Silence,"RaÃºl Campos, Jan Suter",David Attenborough,United States,1-Jan-20,,TV-MA,1 Season,Documentaries,A surly septuagenarian gets another chance at ...
freq,,4263,3,18,18,2032,122,,2028,1318,299,3
mean,76703465.44,,,,,,,2013.36,,,,
std,10945066.86,,,,,,,8.81,,,,
min,247747.0,,,,,,,1925.0,,,,
25%,80035914.5,,,,,,,2013.0,,,,
50%,80163368.0,,,,,,,2016.0,,,,
75%,80244877.5,,,,,,,2018.0,,,,


In [577]:
pd.set_option("display.max_rows", 7, "display.max_columns", None)

# Dataset Cleaning

In [578]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [579]:
df_nfx.isnull().sum()

show_id            0
type               0
title              0
director        1968
cast             573
country          476
date_added        11
release_year       0
rating            10
duration           0
listed_in          0
description        0
dtype: int64

In [580]:
pd.set_option("display.max_rows", 7, "display.max_columns", None)

In [581]:
df_nfx = df_nfx.replace(to_replace = np.nan, value = '')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
...,...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",United Kingdom,,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ..."
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",United States,,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...


In [582]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [583]:
df_nfx.isnull().sum()

show_id         0
type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
dtype: int64

In [584]:
pd.set_option("display.max_rows", 7, "display.max_columns", None)

## Col - show_id

In [585]:
pd.unique(df_nfx['show_id'])

array([81145628, 80117401, 70234439, ..., 80116008, 70281022, 70153404], dtype=int64)

### convert datatype to num

In [586]:
df_nfx['show_id'] = pd.to_numeric(arg = df_nfx['show_id'], downcast = 'integer')

df_nfx['show_id']

0       81145628
1       80117401
2       70234439
          ...   
6228    80116008
6229    70281022
6230    70153404
Name: show_id, Length: 6231, dtype: int32

## Col - release_year

### Convert datatype to num

In [587]:
df_nfx['release_year'] = pd.to_numeric(arg = df_nfx['release_year'], downcast = 'integer')

df_nfx['release_year']

0       2019
1       2016
2       2013
        ... 
6228    2016
6229    2013
6230    2003
Name: release_year, Length: 6231, dtype: int16

## Col - country

### seperate each country

In [588]:
list_country = [j for i in df_nfx['country'] for j in i.split(', ')]

list_country

['United States',
 'India',
 'South Korea',
 'China',
 'United Kingdom',
 'United States',
 'United States',
 'United States',
 'Spain',
 'Bulgaria',
 'United States',
 'Spain',
 'Canada',
 'Chile',
 'United States',
 'United States',
 'United Kingdom',
 'Denmark',
 'Sweden',
 '',
 'Netherlands',
 'Belgium',
 'United Kingdom',
 'United States',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'France',
 'Belgium',
 'United States',
 'France',
 'Belgium',
 'United States',
 'Uruguay',
 'United States',
 'United States',
 'United States',
 '',
 'United States,',
 '',
 'Thailand',
 'China',
 'Canada',
 'United States',
 'United States',
 'Belgium',
 'United Kingdom',
 'United States',
 'Belgium',
 'France',
 '',
 'India',
 '',
 'India',
 '',
 'France',
 'United States',
 'India',
 'United Kingdom',
 'United Kingdom',
 '',
 'United States',
 'Canada',
 'Thailand',
 'Thailand',
 'Thailand',
 'Thailand',
 'Thailand',
 'Thailand',
 'United States',
 'United States',
 'United States',
 'Pakistan',
 'Canada

### transform into dataframe

In [589]:
df_country = pd.DataFrame(data = list_country)

df_country

Unnamed: 0,0
0,United States
1,India
2,South Korea
...,...
7647,
7648,United Kingdom
7649,United States


### add to another col

In [590]:
df_nfx = df_nfx.drop(columns = 'country')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...",9-Sep-19,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,9-Sep-16,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",8-Sep-18,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
...,...,...,...,...,...,...,...,...,...,...,...
6228,80116008,Movie,Little Baby Bum: Nursery Rhyme Friends,,,,2016,,60 min,Movies,Nursery rhymes and original music for children...
6229,70281022,TV Show,A Young Doctor's Notebook and Other Stories,,"Daniel Radcliffe, Jon Hamm, Adam Godley, Chris...",,2013,TV-MA,2 Seasons,"British TV Shows, TV Comedies, TV Dramas","Set during the Russian Revolution, this comic ..."
6230,70153404,TV Show,Friends,,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, ...",,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies",This hit sitcom follows the merry misadventure...


In [591]:
df_nfx = pd.concat(objs = [df_nfx, df_country], axis = 'columns')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,date_added,release_year,rating,duration,listed_in,description,0
0,81145628.00,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...",9-Sep-19,2019.00,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,United States
1,80117401.00,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,9-Sep-16,2016.00,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,India
2,70234439.00,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",8-Sep-18,2013.00,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",South Korea
...,...,...,...,...,...,...,...,...,...,...,...,...
7647,,,,,,,,,,,,
7648,,,,,,,,,,,,United Kingdom
7649,,,,,,,,,,,,United States


### rename the col

In [592]:
df_nfx = df_nfx.rename(columns = {0: 'country'})

df_nfx

Unnamed: 0,show_id,type,title,director,cast,date_added,release_year,rating,duration,listed_in,description,country
0,81145628.00,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...",9-Sep-19,2019.00,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,United States
1,80117401.00,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,9-Sep-16,2016.00,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,India
2,70234439.00,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",8-Sep-18,2013.00,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",South Korea
...,...,...,...,...,...,...,...,...,...,...,...,...
7647,,,,,,,,,,,,
7648,,,,,,,,,,,,United Kingdom
7649,,,,,,,,,,,,United States


## Col - listed_in

### replace NAN with ''

In [593]:
df_nfx['listed_in'] = df_nfx['listed_in'].replace(to_replace = np.nan, value = '')

df_nfx['listed_in']

0       Children & Family Movies, Comedies
1                          Stand-Up Comedy
2                                 Kids' TV
                       ...                
7647                                      
7648                                      
7649                                      
Name: listed_in, Length: 7650, dtype: object

In [594]:
df_nfx['listed_in'] = df_nfx['listed_in'].dropna(how = 'any')

df_nfx['listed_in']

0       Children & Family Movies, Comedies
1                          Stand-Up Comedy
2                                 Kids' TV
                       ...                
7647                                      
7648                                      
7649                                      
Name: listed_in, Length: 7650, dtype: object

### seperate each genre

In [595]:
list_genre = [j for i in df_nfx['listed_in'] for j in i.split(', ')]

list_genre

['Children & Family Movies',
 'Comedies',
 'Stand-Up Comedy',
 "Kids' TV",
 "Kids' TV",
 'Comedies',
 'Crime TV Shows',
 'International TV Shows',
 'Spanish-Language TV Shows',
 'International Movies',
 'Sci-Fi & Fantasy',
 'Thrillers',
 'Stand-Up Comedy',
 'Docuseries',
 'Science & Nature TV',
 'Action & Adventure',
 'Thrillers',
 'Stand-Up Comedy',
 'Action & Adventure',
 'Dramas',
 'International Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Children & Family Movies',
 'Cult Movies',
 'Dramas',
 'Independent Movies',
 'Comedies',
 'Independent Movies',
 'Romantic Movies',
 'Action & Adventure',
 'Comedies',
 'International Movies',
 'Documentaries',
 'Horror Movies',
 'Thrillers',
 'Dramas',
 'Independent Movies',
 'Dramas',
 'Independent Movies',
 'Romantic Movies',
 'International TV Shows',
 'Romantic TV Shows',
 'TV Comedies',
 'Documentaries

### replace "TV", "Movies" with ""

In [596]:
list_genre = [i.replace(' TV', '').replace('TV ', '').replace(' Movies', '').replace(' Shows', '').replace('\'', '') for i in list_genre]

list_genre

['Children & Family',
 'Comedies',
 'Stand-Up Comedy',
 'Kids',
 'Kids',
 'Comedies',
 'Crime',
 'International',
 'Spanish-Language',
 'International',
 'Sci-Fi & Fantasy',
 'Thrillers',
 'Stand-Up Comedy',
 'Docuseries',
 'Science & Nature',
 'Action & Adventure',
 'Thrillers',
 'Stand-Up Comedy',
 'Action & Adventure',
 'Dramas',
 'International',
 'Children & Family',
 'Children & Family',
 'Children & Family',
 'Children & Family',
 'Children & Family',
 'Children & Family',
 'Children & Family',
 'Cult',
 'Dramas',
 'Independent',
 'Comedies',
 'Independent',
 'Romantic',
 'Action & Adventure',
 'Comedies',
 'International',
 'Documentaries',
 'Horror',
 'Thrillers',
 'Dramas',
 'Independent',
 'Dramas',
 'Independent',
 'Romantic',
 'International',
 'Romantic',
 'Comedies',
 'Documentaries',
 'Docuseries',
 'Horror',
 'International',
 'Children & Family',
 'Comedies',
 'Sci-Fi & Fantasy',
 'Comedies',
 'Romantic',
 'Dramas',
 'International',
 'Thrillers',
 'Dramas',
 'Indepen

### transfrom into dataframe

In [597]:
df_genre = pd.DataFrame(data = list_genre)

df_genre

Unnamed: 0,0
0,Children & Family
1,Comedies
2,Stand-Up Comedy
...,...
15079,
15080,
15081,


### rename the col

In [598]:
df_genre = df_genre.rename(columns = {0: 'genre'})

df_genre

Unnamed: 0,genre
0,Children & Family
1,Comedies
2,Stand-Up Comedy
...,...
15079,
15080,
15081,


### remove empty rows

In [599]:
filt_empty = (df_genre['genre'] == '')

filt_empty

0        False
1        False
2        False
         ...  
15079     True
15080     True
15081     True
Name: genre, Length: 15082, dtype: bool

In [600]:
df_genre[filt_empty]

Unnamed: 0,genre
13663,
13664,
13665,
...,...
15079,
15080,
15081,


In [601]:
df_genre = df_genre.iloc[:13663, :]

df_genre

Unnamed: 0,genre
0,Children & Family
1,Comedies
2,Stand-Up Comedy
...,...
13660,Dramas
13661,Classic & Cult
13662,Comedies


### add to another col

In [602]:
df_nfx = pd.concat(objs = [df_nfx, df_genre], axis = 'columns')

df_nfx

Unnamed: 0,show_id,type,title,director,cast,date_added,release_year,rating,duration,listed_in,description,country,genre
0,81145628.00,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...",9-Sep-19,2019.00,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,United States,Children & Family
1,80117401.00,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,9-Sep-16,2016.00,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,India,Comedies
2,70234439.00,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",8-Sep-18,2013.00,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",South Korea,Stand-Up Comedy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13660,,,,,,,,,,,,,Dramas
13661,,,,,,,,,,,,,Classic & Cult
13662,,,,,,,,,,,,,Comedies


# Export To File

## Export dataset to a CSV file

In [603]:
df_nfx.to_csv('netflix_titles(done).csv', index = False)

## Confirm the file

In [604]:
df_nfx_done = pd.read_csv('netflix_titles(done).csv', na_values = '')

df_nfx_done

Unnamed: 0,show_id,type,title,director,cast,date_added,release_year,rating,duration,listed_in,description,country,genre
0,81145628.00,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...",9-Sep-19,2019.00,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...,United States,Children & Family
1,80117401.00,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,9-Sep-16,2016.00,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...,India,Comedies
2,70234439.00,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",8-Sep-18,2013.00,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob...",South Korea,Stand-Up Comedy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13660,,,,,,,,,,,,,Dramas
13661,,,,,,,,,,,,,Classic & Cult
13662,,,,,,,,,,,,,Comedies
