In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('NetFlix.csv')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,genres,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,14-Aug-20,2020,TV-MA,4,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s10,Movie,1920,Vikram Bhatt,"Rajneesh Duggal, Adah Sharma, Indraneil Sengup...",India,15-Dec-17,2008,TV-MA,143,"Horror Movies, International Movies, Thrillers",An architect and his wife move into a castle t...
2,s100,Movie,3 Heroines,Iman Brotoseno,"Reza Rahadian, Bunga Citra Lestari, Tara Basro...",Indonesia,5-Jan-19,2016,TV-PG,124,"Dramas, International Movies, Sports Movies",Three Indonesian women break records by becomi...
3,s1000,Movie,Blue Mountain State: The Rise of Thadland,Lev L. Spiro,"Alan Ritchson, Darin Brooks, James Cade, Rob R...",United States,1-Mar-16,2016,R,90,Comedies,New NFL star Thad buys his old teammates' belo...
4,s1001,TV Show,Blue Planet II,,David Attenborough,United Kingdom,3-Dec-18,2017,TV-G,1,"British TV Shows, Docuseries, Science & Nature TV",This sequel to the award-winning nature series...


In [3]:
df.shape

(7787, 12)

In [4]:
df.title.nunique()

7787

In [5]:
df.rating.nunique()

14

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7787 entries, 0 to 7786
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       7787 non-null   object
 1   type          7787 non-null   object
 2   title         7787 non-null   object
 3   director      5398 non-null   object
 4   cast          7069 non-null   object
 5   country       7280 non-null   object
 6   date_added    7777 non-null   object
 7   release_year  7787 non-null   int64 
 8   rating        7780 non-null   object
 9   duration      7787 non-null   int64 
 10  genres        7787 non-null   object
 11  description   7787 non-null   object
dtypes: int64(2), object(10)
memory usage: 730.2+ KB


In [7]:
df.isnull().sum()

show_id            0
type               0
title              0
director        2389
cast             718
country          507
date_added        10
release_year       0
rating             7
duration           0
genres             0
description        0
dtype: int64

In [8]:
df.rating.fillna(df.loc[max(df.rating.value_counts())].rating,inplace = True)

df.description = df.description.apply(lambda x: x.lower())

In [9]:
newdf = df[['show_id','type','title','director','cast','rating','genres','description']].copy()

In [10]:
newdf.head(1)

Unnamed: 0,show_id,type,title,director,cast,rating,genres,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",TV-MA,"International TV Shows, TV Dramas, TV Sci-Fi &...",in a future where the elite inhabit an island ...


In [11]:
def edit_text(x):
    if str(x) == 'nan':
        return ''
    text_list = x.strip().split(',')
    
    for j,i in enumerate(text_list,start = 0):
        i = i.replace(' ','')
        text_list[j] = i
    
    return ' '.join(text_list)

In [12]:
newdf.cast = newdf.cast.apply(edit_text)

newdf.genres = newdf.genres.apply(edit_text)

newdf.director = newdf.director.apply(lambda x: x.replace(' ','') if str(x) != 'nan' else '')

newdf.rating = newdf.rating.apply(lambda x: x.replace('-','') if str(x) != 'nan' else '')

newdf['tags'] = newdf.description + ' '+newdf.genres + ' '+ newdf.director + ' ' + newdf.cast + ' ' +newdf.rating+ ' ' +df.type

In [13]:
newdf.tags[0]

'in a future where the elite inhabit an island paradise far from the crowded slums, you get one chance to join the 3% saved from squalor. InternationalTVShows TVDramas TVSci-Fi&Fantasy  JoãoMiguel BiancaComparato MichelGomes RodolfoValente VanezaOliveira RafaelLozano VivianePorto MelFronckowiak SergioMamberti ZezéMotta CelsoFrateschi TVMA TV Show'

In [14]:
movies = newdf[['show_id','title','tags']].copy()

In [15]:
movies.head()

Unnamed: 0,show_id,title,tags
0,s1,3%,in a future where the elite inhabit an island ...
1,s10,1920,an architect and his wife move into a castle t...
2,s100,3 Heroines,three indonesian women break records by becomi...
3,s1000,Blue Mountain State: The Rise of Thadland,new nfl star thad buys his old teammates' belo...
4,s1001,Blue Planet II,this sequel to the award-winning nature series...


In [16]:
movies.title = movies.title.apply((lambda x: x.lower()))

In [17]:
from sklearn.feature_extraction.text import CountVectorizer

In [18]:
cv = CountVectorizer(max_features=movies.title.nunique(),stop_words='english')

In [19]:
vector = cv.fit_transform(movies.tags).toarray()

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
similarity = cosine_similarity(vector)

In [28]:
def recommend(movie,n):
    print(movie.title().center(50,'_'))
    index = movies[movies.title == movie].index[0]
    
    distances = sorted(list(enumerate(similarity[index])),reverse = True,key = lambda x: x[1])
    
    movies['similarity'] = similarity[index]
    
    for j,i in enumerate(distances[1:n],start = 1):
        print(f'{j} ->\t{movies.iloc[i[0]].title.title()}\nType: {newdf.iloc[i[0]].type.title()}\nSimilarity: {round(movies.iloc[i[0]].similarity,2)}\n')

In [29]:
def choice():
    choice = input("\nPress any key to contine or press 'e' to exit: ")
    print('\n')
    if choice.lower().strip() == 'e':
        return False
    return True

In [30]:
while True:
    try:
        title = input("\nEnter the movie/tv name: ").lower().strip()
        n = int(input("Number of similarity: "))
        print('\n')
        if n >movies.title.nunique():
            print(f"Number of similarity max value is {movies.title.nunique()} !\n")
            continue
        recommend(movie = title,n = n+1)
    except Exception as ex:
        print("Invalid movie name or number of similarity!")
        
        if choice() == False:
            break
    else:
        if choice() == False:
            break


Enter the movie/tv name: narcos
Number of similarity: 5


______________________Narcos______________________
1 ->	El Chapo
Type: Tv Show
Similarity: 0.44

2 ->	Narcos: Mexico
Type: Tv Show
Similarity: 0.44

3 ->	Bad Blood
Type: Tv Show
Similarity: 0.33

4 ->	Spartacus
Type: Tv Show
Similarity: 0.33

5 ->	El Cartel
Type: Tv Show
Similarity: 0.32


Press any key to contine or press 'e' to exit: 



Enter the movie/tv name: bird box
Number of similarity: 5


_____________________Bird Box_____________________
1 ->	Equals
Type: Movie
Similarity: 0.31

2 ->	Apollo 18
Type: Movie
Similarity: 0.29

3 ->	The Lake House
Type: Movie
Similarity: 0.28

4 ->	Sweetheart
Type: Movie
Similarity: 0.28

5 ->	A Boy Called Po
Type: Movie
Similarity: 0.27


Press any key to contine or press 'e' to exit: e


