In [1]:
import pickle
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import pairwise_distances, pairwise_distances_chunked
from scipy.sparse import vstack
from nltk.corpus.reader.wordnet import NOUN
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [2]:
imdb_movies = pd.read_csv("data/IMDb movies.csv", low_memory=False)

In [124]:
netflix = pd.read_csv("data/netflix_titles.csv")

In [3]:
imdb_recomend = imdb_movies[["original_title", "year", "genre","director", "actors", "description"]]

In [4]:
def remove_spaces(lst):
    spaces_removed = []
    for name in lst:
        spaces_removed.append(name.replace(" ", ""))
    return spaces_removed

In [5]:
def list_for_remove(string):
    string=str(string)
    return " ".join(remove_spaces(string.split(", ")))

In [9]:
def clean_up(imdb_recomend):
    #fill any NAN values
    imdb_recomend=imdb_recomend.copy()
    imdb_recomend.director.fillna("Unlisted",inplace=True)
    imdb_recomend.actors.fillna("Unavailable",inplace=True)
    imdb_recomend.genre.fillna("Unknown",inplace=True)  
    
    #remove spaces from actors and directors names
    imdb_recomend["actors"] = imdb_recomend["actors"].apply(lambda x: list_for_remove(x))
    imdb_recomend["director"] = imdb_recomend["director"].apply(lambda x: list_for_remove(x))
    imdb_recomend["genre"] = imdb_recomend["genre"].apply(lambda x: list_for_remove(x))
#     imdb_recomend["original_title"] = imdb_recomend["original_title"].apply(lambda x: list_for_remove(x))

    return imdb_recomend

In [10]:
imdb_recomend = clean_up(imdb_recomend)

In [13]:
def get_keywords(imdb_recomend):
    imdb_recomend =imdb_recomend.copy()
    imdb_recomend.description.fillna("Unknown",inplace=True) 
    imdb_recomend.description = imdb_recomend.description.apply(lambda x: make_keywords(x))
    imdb_recomend.original_title = imdb_recomend.original_title.apply(lambda x: make_keywords(x))
    return imdb_recomend

In [14]:
def make_keywords(string):
    tokens = word_tokenize(string)
    # convert to lower case
    tokens = [w.lower() for w in tokens]
    # remove punctuation from each word
    import string
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    words = [word for word in stripped if word.isalpha()]
    # filter out stop words
    from nltk.corpus import stopwords
    stop_words = set(stopwords.words(['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'greek','hungarian', 'indonesian', 'italian', 'kazakh', 'nepali', 'norwegian', 'portuguese', 'romanian', 'russian', 'slovene', 'spanish', 'swedish', 'tajik', 'turkish']))
    words = [w for w in words if not w in stop_words]
    lem = WordNetLemmatizer()
    words = [lem.lemmatize(w) for w in words]
    return " ".join(words)

In [15]:
imdb_recomend = get_keywords(imdb_recomend)

In [16]:
imdb_recomend

Unnamed: 0,original_title,year,genre,director,actors,description
0,miss jerry,1894,Romance,AlexanderBlack,BlancheBayliss WilliamCourtenay ChaunceyDepew,adventure female reporter
1,story kelly gang,1906,Biography Crime Drama,CharlesTait,ElizabethTait JohnTait NormanCampbell BellaCol...,true story notorious australian outlaw kelly
2,sorte drøm,1911,Drama,UrbanGad,AstaNielsen ValdemarPsilander GunnarHelsengree...,two high rank wooing beautiful famous equestri...
3,cleopatra,1912,Drama History,CharlesL.Gaskill,HelenGardner PearlSindelar MissFielding MissRo...,fabled queen egypt affair roman general marc a...
4,linferno,1911,Adventure Drama Fantasy,FrancescoBertolini AdolfoPadovan,SalvatorePapa ArturoPirovano GiuseppedeLiguoro...,loosely adapted dante divine comedy inspired i...
...,...,...,...,...,...,...
85850,lion,2020,Comedy,LudovicColbeau-Justin,DanyBoon PhilippeKaterine AnneSerra SamuelJouy...,psychiatric hospital patient pretend crazy cha...
85851,beentjes sinthildegard,2020,Comedy Drama,JohanNijenhuis,HermanFinkers JohannaterSteege LeonieterBraak ...,middleaged veterinary surgeon belief wife pamp...
85852,padmavyuhathile abhimanyu,2019,Drama,VineeshAaradya,AnoopChandran Indrans SonaNair SimonBrittoRodr...,unknown
85853,sokagin çocuklari,2019,Drama Family,AhmetFaikAkinci,AhmetFaikAkinci BelmaMamati MetinKeçeci Burhan...,unknown


In [19]:
first_five = imdb_recomend[:5].copy()

In [22]:
first_six = first_five.append(imdb_recomend[:1]).copy()

In [76]:
first_six

Unnamed: 0,original_title,year,genre,director,actors,description
0,miss jerry,1894,Romance,AlexanderBlack,BlancheBayliss WilliamCourtenay ChaunceyDepew,adventure female reporter
1,story kelly gang,1906,Biography Crime Drama,CharlesTait,ElizabethTait JohnTait NormanCampbell BellaCol...,true story notorious australian outlaw kelly
2,sorte drøm,1911,Drama,UrbanGad,AstaNielsen ValdemarPsilander GunnarHelsengree...,two high rank wooing beautiful famous equestri...
3,cleopatra,1912,Drama History,CharlesL.Gaskill,HelenGardner PearlSindelar MissFielding MissRo...,fabled queen egypt affair roman general marc a...
4,linferno,1911,Adventure Drama Fantasy,FrancescoBertolini AdolfoPadovan,SalvatorePapa ArturoPirovano GiuseppedeLiguoro...,loosely adapted dante divine comedy inspired i...
0,miss jerry,1894,Romance,AlexanderBlack,BlancheBayliss WilliamCourtenay ChaunceyDepew,adventure female reporter


In [77]:
def mashup(imdb):
    str_list = []
    for i in range(imdb.shape[0]):
        if i not in imdb.index:
            continue
        key = imdb.description.iloc[i]+" "+imdb.year.iloc[i]+" "+imdb.actors.iloc[i]+" "+imdb.actors.iloc[i]+" "+imdb.actors.iloc[i]+" "+imdb.actors.iloc[i]+" "+imdb.actors.iloc[i]+" "+imdb.director.iloc[i]+" "+imdb.director.iloc[i]+" "+imdb.director.iloc[i]+" "+imdb.genre.iloc[i]+" "+imdb.original_title.iloc[i]+" "+imdb.original_title.iloc[i]
        str_list.append(key)
        print(key)
        print()
    return str_list

In [78]:
imdb_lst = mashup(first_six)

adventure female reporter 1894 BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew AlexanderBlack AlexanderBlack AlexanderBlack Romance miss jerry miss jerry

true story notorious australian outlaw kelly 1906 ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait Norman

In [89]:
for val in imdb_lst:
    print(val)
    print()
    

adventure female reporter 1894 BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew BlancheBayliss WilliamCourtenay ChaunceyDepew AlexanderBlack AlexanderBlack AlexanderBlack Romance miss jerry miss jerry

true story notorious australian outlaw kelly 1906 ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait NormanCampbell BellaCola WillCoyne SamCrewes JackEnnis JohnForde VeraLinden Mr.Marshall Mr.McKenzie FrankMills OllieWilson ElizabethTait JohnTait Norman

In [87]:
miss = imdb_lst[0]

In [88]:
imdb_lst.append(miss)

In [90]:
ivectorizor = CountVectorizer()
ikeys = ivectorizor.fit_transform(imdb_lst)

In [91]:
idist = pairwise_distances_chunked(ikeys, metric='cosine')

In [92]:
y=0
while (y < first_six.shape[0]):
    pull = next(idist)
    y += pull.shape[0]
    dbfile = open(f'chunks/tiny_imdb_test{y}.pkl', 'ab')
    # source, destination
    pickle.dump(pull, dbfile)                     
    dbfile.close()

In [93]:
pull

array([[0.        , 1.        , 1.        , 1.        , 0.99312321,
        0.        ],
       [1.        , 0.        , 0.99663206, 0.63334801, 0.99685578,
        1.        ],
       [1.        , 0.99663206, 0.        , 0.99771723, 0.99017291,
        1.        ],
       [1.        , 0.63334801, 0.99771723, 0.        , 0.99786886,
        1.        ],
       [0.99312321, 0.99685578, 0.99017291, 0.99786886, 0.        ,
        0.99312321],
       [0.        , 1.        , 1.        , 1.        , 0.99312321,
        0.        ]])

In [120]:
print(pull.shape[0])
studs = np.argsort(pull[0])
first_six['original_title'][studs]

6


0          miss jerry
5          miss jerry
4            linferno
1    story kelly gang
2          sorte drøm
3           cleopatra
Name: original_title, dtype: object

In [50]:
pull[0]

array([0.        , 1.        , 1.        , 1.        , 0.99312321])

In [52]:
pull[4]

array([0.99312321, 0.99685578, 0.99017291, 0.99786886, 0.        ])

In [105]:
sim = 1-pull

In [122]:
sim[0]

array([1.        , 0.        , 0.        , 0.        , 0.00687679,
       1.        ])

In [123]:
sim[5]

array([1.        , 0.        , 0.        , 0.        , 0.00687679,
       1.        ])

In [121]:

studs = np.argsort(sim[0])[::-1]
first_six.original_title[studs]

5          miss jerry
0          miss jerry
4            linferno
3           cleopatra
2          sorte drøm
1    story kelly gang
Name: original_title, dtype: object

In [99]:
first_six = first_six.reset_index()

In [100]:
first_six

Unnamed: 0,index,original_title,year,genre,director,actors,description
0,0,miss jerry,1894,Romance,AlexanderBlack,BlancheBayliss WilliamCourtenay ChaunceyDepew,adventure female reporter
1,1,story kelly gang,1906,Biography Crime Drama,CharlesTait,ElizabethTait JohnTait NormanCampbell BellaCol...,true story notorious australian outlaw kelly
2,2,sorte drøm,1911,Drama,UrbanGad,AstaNielsen ValdemarPsilander GunnarHelsengree...,two high rank wooing beautiful famous equestri...
3,3,cleopatra,1912,Drama History,CharlesL.Gaskill,HelenGardner PearlSindelar MissFielding MissRo...,fabled queen egypt affair roman general marc a...
4,4,linferno,1911,Adventure Drama Fantasy,FrancescoBertolini AdolfoPadovan,SalvatorePapa ArturoPirovano GiuseppedeLiguoro...,loosely adapted dante divine comedy inspired i...
5,0,miss jerry,1894,Romance,AlexanderBlack,BlancheBayliss WilliamCourtenay ChaunceyDepew,adventure female reporter


In [135]:
netflix.rename(columns = {'show_id': 'title_id', 'title': 'original_title','release_year': 'year', 'listed_in': 'genre','cast': 'actors' },inplace=True)

In [136]:
netflix

Unnamed: 0,title_id,type,original_title,director,actors,country,date_added,year,rating,duration,genre,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [137]:
imdb_movies['type']='Movie'

In [139]:
imdb_movies.rename(columns = {'imdb_title_id': 'title_id'},inplace=True)

In [151]:
imdb_cutdown = imdb_movies[['title_id','type','original_title','director','actors','year','duration','genre','description']].copy()

In [153]:
netflix_cutdown = netflix[['title_id','type','original_title','director','actors','year','duration','genre','description']].copy()

In [154]:
netflix_cutdown

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",2020,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",2016,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",2011,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",2009,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",2008,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,2019,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [155]:
total_list = imdb_cutdown.copy()

In [156]:
total_list.append()

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
85850,tt9908390,Movie,Le lion,Ludovic Colbeau-Justin,"Dany Boon, Philippe Katerine, Anne Serra, Samu...",2020,95,Comedy,A psychiatric hospital patient pretends to be ...
85851,tt9911196,Movie,De Beentjes van Sint-Hildegard,Johan Nijenhuis,"Herman Finkers, Johanna ter Steege, Leonie ter...",2020,103,"Comedy, Drama",A middle-aged veterinary surgeon believes his ...
85852,tt9911774,Movie,Padmavyuhathile Abhimanyu,Vineesh Aaradya,"Anoop Chandran, Indrans, Sona Nair, Simon Brit...",2019,130,Drama,
85853,tt9914286,Movie,Sokagin Çocuklari,Ahmet Faik Akinci,"Ahmet Faik Akinci, Belma Mamati, Metin Keçeci,...",2019,98,"Drama, Family",


In [166]:
for i in range(netflix_cutdown.shape[0]):
    total_list=total_list.append(netflix_cutdown.iloc[i])


In [158]:
netflix_cutdown.shape

(7787, 9)

In [161]:
type(netflix_cutdown.iloc[0])

pandas.core.series.Series

In [167]:
total_list

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7784,s7785,Movie,Zulu Man in Japan,,Nasty C,2019,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7785,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [168]:
total_list = total_list.reset_index()

In [171]:
total_list.drop('index',inplace=True, axis=1)

In [173]:
total_list

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
93637,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
93638,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
93639,s7785,Movie,Zulu Man in Japan,,Nasty C,2019,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
93640,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [197]:
type(total_list[total_list.type=='Movie'].duration[93639])

int

In [194]:
def strip_min(x):
    if isinstance(x, str):
        lst = x.split()
        if len(lst)==1:
            x = int(lst[0])
        else:
            if lst[1]=='min':
                x = lst[0]
    return x

In [195]:
total_list.duration = total_list.duration.apply(lambda x: strip_min(x))

In [196]:
total_list

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
93637,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
93638,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
93639,s7785,Movie,Zulu Man in Japan,,Nasty C,2019,44,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
93640,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [198]:
total_list[total_list.type=='TV Show'].duration

85855    4 Seasons
85860     1 Season
85866     1 Season
85867     1 Season
85871     1 Season
           ...    
93622     1 Season
93630     1 Season
93632    2 Seasons
93634     1 Season
93640     1 Season
Name: duration, Length: 2410, dtype: object

In [199]:
total_list.to_csv("Combined_Netflix_IMDb_Useful_Columns.csv")

In [200]:
total_list.fillna("NAN",inplace=True)

In [201]:
total_list

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
93637,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
93638,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
93639,s7785,Movie,Zulu Man in Japan,NAN,Nasty C,2019,44,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
93640,s7786,TV Show,Zumbo's Just Desserts,NAN,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [216]:
total_list[total_list.genre=="NAN"]

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description


In [212]:
total_list

Unnamed: 0,title_id,type,original_title,director,actors,year,duration,genre,description
0,tt0000009,Movie,Miss Jerry,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",1894,45,Romance,The adventures of a female reporter in the 1890s.
1,tt0000574,Movie,The Story of the Kelly Gang,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",1906,70,"Biography, Crime, Drama",True story of notorious Australian outlaw Ned ...
2,tt0001892,Movie,Den sorte drøm,Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",1911,53,Drama,Two men of high rank are both wooing the beaut...
3,tt0002101,Movie,Cleopatra,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",1912,100,"Drama, History",The fabled queen of Egypt's affair with Roman ...
4,tt0002130,Movie,L'Inferno,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",1911,68,"Adventure, Drama, Fantasy",Loosely adapted from Dante's Divine Comedy and...
...,...,...,...,...,...,...,...,...,...
93637,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...",2005,99,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
93638,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",2015,111,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
93639,s7785,Movie,Zulu Man in Japan,NAN,Nasty C,2019,44,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
93640,s7786,TV Show,Zumbo's Just Desserts,NAN,"Adriano Zumbo, Rachel Khoo",2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...
