In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
data = pd.read_csv('netflix_titles.csv')

In [None]:
data.columns

In [None]:
data.head(3)

#### Separando o dataframe em 2: um pra filmes e outro para series

In [None]:
tv_show = data['type'] == 'TV Show'
tv_show_data = data[tv_show].reset_index()
tv_show_data.head()
tv_show_data = tv_show_data.drop(['show_id', 'date_added', 'release_year', 'type', 'director'], axis=1)
tv_show_data.head()


In [None]:
movie = data['type'] == 'Movie'
movie_data = data[movie].reset_index()
movie_data.head()
movie_data = movie_data.drop(['show_id', 'type', 'date_added', 'release_year'], axis=1)
movie_data.head()


In [None]:
movie_data.head()

In [None]:
tv_show_data.head()

#### Obtendo os diretores e atores para os filmes

In [None]:
movie_data['director']
directors = []
unique_directors = []
for director in movie_data['director']:
  directors.append(director)
for director in directors:
  if director not in unique_directors:
    unique_directors.append(director)

In [None]:
cast = movie_data['cast']
cast.astype('string')
print(cast[0])
actors = []
for i in cast:
  try:
    i = i.split(', ')
    for actor in i:
      actors.append(actor)
  except Exception as e:
    pass

unique_actors = []
for actor in actors:
  if actor not in unique_actors:
    unique_actors.append(actor)

In [None]:
print(unique_actors)
print(actors)
print(unique_actors == actors)

print(unique_directors)
print(directors)
print(unique_directors == directors)

#### Obtendo os diretores e atores para as series


In [None]:
cast = tv_show_data['cast']
cast.astype('string')
print(cast[0])
actors = []
for i in cast:
  try:
    i = i.split(', ')
    for actor in i:
      actors.append(actor)
  except Exception as e:
    pass

unique_actors = []
for actor in actors:
  if actor not in unique_actors:
    unique_actors.append(actor)


In [None]:
print(unique_actors)
print(actors)
print(unique_actors == actors)

#### Obtendo os gêneros disponíveis

In [None]:
genres_data = data['listed_in']
genres = []
unique_genres = []
genres_data.astype('string')
for i in genres_data:
  try:
    i = i.split(', ')
    for item in i:
      genres.append(item)
  except Exception as e:
    pass

for genre in genres:
  if genre not in unique_genres:
    unique_genres.append(genre)

In [None]:
def busca_genero(genero):
  condicao = data['listed_in'].str.find(genero) != -1
  resultado = pd.DataFrame(data[condicao])
  return resultado

def busca_genero_TvShow(genero):
  condicao = tv_show_data['listed_in'].str.find(genero) != -1
  resultado = pd.DataFrame(tv_show_data[condicao])
  return resultado 

def busca_genero_Movie(genero):
  condicao = movie_data['listed_in'].str.find(genero) != -1
  resultado = pd.DataFrame(movie_data[condicao])
  return resultado   

In [None]:
movie_data.shape

In [None]:
tv_show_data.shape

In [None]:
#### Fazendo as Recomendações de Filmes

In [None]:
movie_data.columns

In [None]:
features = ['director', 'cast', 'listed_in', 'description', 'title']

In [None]:
def combine_features(row):
    return row['director']+' '+row['cast']+' '+row['listed_in']+' '+row['description']+' '+row['title']

In [None]:
for feature in features:
    movie_data[feature] = movie_data[feature].fillna('')
movie_data['combined_features'] = movie_data.apply(combine_features, axis=1)

In [None]:
cv = CountVectorizer()
count_matrix = cv.fit_transform(movie_data['combined_features'])
cosine_sim = cosine_similarity(count_matrix)

In [None]:
def find_title_from_index(index):
    return movie_data[movie_data.index == index]['title'].values[0]
def find_index_from_title(title):
    return movie_data[movie_data.title == title].index.values[0]

In [None]:
def movie_recommendation(movie_title):
    movie_index = find_index_from_title(movie_title)
    similar_movies = list(enumerate(cosine_sim[movie_index]))
    sorted_similar_movies = sorted(similar_movies, key = lambda x:x[1], reverse=True)[1:]
    rec_indexes = [tupla[0] for tupla in sorted_similar_movies[0:5]]
    rec_titles = []
    for index in rec_indexes:
        rec_titles.append(find_title_from_index(index))
    return rec_titles

In [None]:
#### Fazendo as Recomendações de Séries


In [None]:
tv_show_data.columns

In [None]:
fatores = ['description', 'listed_in', 'cast']

In [None]:
def combinar_fatores(row):
    return row['description']+' '+row['listed_in']+' '+row['cast']

In [None]:
for fator in fatores:
    tv_show_data[fator] = tv_show_data[fator].fillna('')
tv_show_data['fatores_combinados'] = tv_show_data.apply(combinar_fatores, axis=1)

In [None]:
cv = CountVectorizer()
count_matrix = cv.fit_transform(tv_show_data['fatores_combinados'])
cosine_sim = cosine_similarity(count_matrix)

In [None]:
def tv_show_index_to_title(index):
    return tv_show_data[tv_show_data.index == index]['title'].values[0]
def tv_show_title_to_index(title):
    return tv_show_data[tv_show_data.title == title].index.values[0]

In [None]:
def tv_show_recommendation(tv_show_title):
    tv_show_index = tv_show_title_to_index(tv_show_title)
    similar_tv_shows = list(enumerate(cosine_sim[tv_show_index]))
    sorted_similar_tv_shows = sorted(similar_tv_shows, key=lambda x:x[1], reverse=True)[1:]
    indexes = [tupla[0] for tupla in sorted_similar_tv_shows[0:5]]
    titles = []
    for index in indexes:
        titles.append(tv_show_index_to_title(index))
    return titles

In [None]:
tv_show_recommendation('La casa de papel')

In [None]:
movie_recommendation('Bird Box')