In [1]:
import numpy as np
import scipy
import sklearn
from sklearn import svm
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import pandas as pd
import tensorflow as tf
from collections import Counter

In [2]:
df = pd.read_csv('netflix_titles.csv')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China","September 9, 2019",2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,"September 9, 2016",2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


In [3]:
for description in df.loc[:3, 'description']:
    print(description)
    print('-----------------------')

Before planning an awesome wedding for his grandfather, a polar bear king must take back a stolen artifact from an evil archaeologist first.
-----------------------
Jandino Asporaat riffs on the challenges of raising kids and serenades the audience with a rousing rendition of "Sex on Fire" in his comedy show.
-----------------------
With the help of three human allies, the Autobots once again protect Earth from the onslaught of the Decepticons and their leader, Megatron.
-----------------------
When a prison ship crash unleashes hundreds of Decepticons on Earth, Bumblebee leads a new Autobot force to protect humankind.
-----------------------


In [4]:
corpus = [description for description in df.loc[:, 'description']]
print(len(corpus))
corpus[:3]

6234


['Before planning an awesome wedding for his grandfather, a polar bear king must take back a stolen artifact from an evil archaeologist first.',
 'Jandino Asporaat riffs on the challenges of raising kids and serenades the audience with a rousing rendition of "Sex on Fire" in his comedy show.',
 'With the help of three human allies, the Autobots once again protect Earth from the onslaught of the Decepticons and their leader, Megatron.']

In [5]:
targets = [t for t in df.loc[:, 'type']]
targets[:5]

['Movie', 'Movie', 'TV Show', 'TV Show', 'Movie']

In [6]:
s=set(targets)
tar=list()
for t in targets:
    for i,j in enumerate(s):
        if t==j:
            tar.append(i)
targets=np.array(tar)
print(targets)

[1 1 0 ... 1 0 0]


In [7]:
count_vectorizer = CountVectorizer(stop_words='english')
bows = count_vectorizer.fit_transform(corpus)
print("We have {} descriptions.".format(bows.shape[0]))
pd.DataFrame(bows.toarray(), columns=count_vectorizer.get_feature_names()).head()

We have 6234 descriptions.


Unnamed: 0,000,007,009,10,100,1000,102,10th,11,112,...,zurich,zé,álex,álvaro,émile,ömer,über,łukasz,ōarai,şeref
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
bows = bows.toarray().astype(np.float32)

In [9]:
num_of_train = 5000
X_train, y_train = bows[:num_of_train], targets[:num_of_train]
X_test, y_test = bows[num_of_train:], targets[num_of_train:]

In [10]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(50, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(50, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid)
])

In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
model.fit(X_train, y_train, epochs=10)

Train on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x23b9250ff60>

In [13]:
model.evaluate(X_test, y_test)



[2.74490608697579, 0.57050246]

In [14]:
print('Descriptions of funny movies')

listed_in = [t for t in df.loc[:, 'listed_in']]
listOfComedies=list()
for i,j in enumerate(listed_in):
        if j=='Comedies':
            listOfComedies.append(i)

Comedies = [Comedies for Comedies in df.loc[listOfComedies, 'description']]

Comedies[:3]

Descriptions of funny movies


['When nerdy high schooler Dani finally attracts the interest of her longtime crush, she lands in the cross hairs of his ex, a social media celebrity.',
 'To keep his teen crush from spotting him in an embarrassing YouTube video, Anthony and his friend Ian must enter the virtual world and alter the clip.',
 'Armed with awkward questions and zero self-awareness, Zach Galifianakis hits the road to find famous interview subjects for his no-budget talk show.']

In [15]:
words = [row.split(' ') for row in Comedies]
words[:15]

[['When',
  'nerdy',
  'high',
  'schooler',
  'Dani',
  'finally',
  'attracts',
  'the',
  'interest',
  'of',
  'her',
  'longtime',
  'crush,',
  'she',
  'lands',
  'in',
  'the',
  'cross',
  'hairs',
  'of',
  'his',
  'ex,',
  'a',
  'social',
  'media',
  'celebrity.'],
 ['To',
  'keep',
  'his',
  'teen',
  'crush',
  'from',
  'spotting',
  'him',
  'in',
  'an',
  'embarrassing',
  'YouTube',
  'video,',
  'Anthony',
  'and',
  'his',
  'friend',
  'Ian',
  'must',
  'enter',
  'the',
  'virtual',
  'world',
  'and',
  'alter',
  'the',
  'clip.'],
 ['Armed',
  'with',
  'awkward',
  'questions',
  'and',
  'zero',
  'self-awareness,',
  'Zach',
  'Galifianakis',
  'hits',
  'the',
  'road',
  'to',
  'find',
  'famous',
  'interview',
  'subjects',
  'for',
  'his',
  'no-budget',
  'talk',
  'show.'],
 ['When',
  'her',
  'class',
  'rank',
  'threatens',
  'her',
  'college',
  'plans,',
  'an',
  'ambitious',
  'teen',
  'convinces',
  'a',
  'nerdy',
  'peer',
  'to',
