# **How to decide what to watch on Netflix? - Neural Network**

New movies come out on Netflix every month, some I have never heard of. Is it possible to build a model that will tell me if a movie is worth watching based on a subset of movie parameters? 

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../Resources/cleaned_data.csv")
df.head()

Unnamed: 0,Title,Year,Age,IMDb,Rotten Tomatoes,Netflix,Hulu,Prime Video,Disney+,Directors,Genres,Country,Runtime,production_company,Top_Genres,Top_Director
0,Inception,2010,13+,8.8,8.7,1,0,0,0,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller",United States,148.0,Warner Bros.,Action,Christopher Nolan
1,The Matrix,1999,18+,8.7,8.7,1,0,0,0,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,136.0,,Action,Lana Wachowski
2,Avengers: Infinity War,2018,13+,8.5,8.4,1,0,0,0,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,149.0,,Action,Anthony Russo
3,Back to the Future,1985,7+,8.5,9.6,1,0,0,0,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,116.0,,Adventure,Robert Zemeckis
4,"The Good, the Bad and the Ugly",1966,18+,8.8,9.7,1,0,1,0,Sergio Leone,Western,Italy,161.0,,Western,Sergio Leone


In [3]:
df = df[["Year", "Country", "Runtime", "Top_Genres", "IMDb"]]
df = df.rename(columns={"Top_Genres":"Genre"})
df = df.dropna()
df["Runtime"] = df[["Runtime"]].round(0).astype(int)
df["IMDb"] = (df["IMDb"]*10).round(0).astype(int)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,United States,148,Action,88
1,1999,United States,136,Action,87
2,2018,United States,149,Action,85
3,1985,United States,116,Adventure,85
4,1966,Italy,161,Western,88


In [4]:
cleanup_genre = {"Genre": {'Action': 5, 'Adventure': 5, 'Western': 3, 'Animation': 7, 'Biography': 1,
       'Drama': 8, 'Crime': 5, 'Comedy': 4, 'Documentary': 1, 'Family': 8, 'Horror': 6,
       'Thriller': 5, 'Mystery': 5, 'Fantasy': 7, 'Romance': 8, 'Short': 7, 'Sci-Fi': 6,
       'Sport': 2, 'Reality-TV': 4, 'Musical': 7, 'Music': 7, 'War': 3, 'History': 1,
       'Film-Noir': 6, 'Talk-Show': 4, 'Game-Show': 4}}

In [5]:
df = df.replace(cleanup_genre)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,United States,148,5,88
1,1999,United States,136,5,87
2,2018,United States,149,5,85
3,1985,United States,116,5,85
4,1966,Italy,161,3,88


In [6]:
df.loc[df["Country"] == "United States", "Country"] = 1
df.loc[df["Country"] != 1, "Country"] = 0
df["Country"] = df["Country"].astype(int)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,1,148,5,88
1,1999,1,136,5,87
2,2018,1,149,5,85
3,1985,1,116,5,85
4,1966,0,161,3,88


In [7]:
df.loc[df["IMDb"] < 80, "IMDb"] = 0
df.loc[df["IMDb"] >= 80, "IMDb"] = 1
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,1,148,5,1
1,1999,1,136,5,1
2,2018,1,149,5,1
3,1985,1,116,5,1
4,1966,0,161,3,1


In [8]:
data = df.drop("IMDb", axis=1)
target = df["IMDb"].values.reshape(-1, 1)
print(data.shape, target.shape)

(15699, 4) (15699, 1)


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=1)

In [10]:
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(units=10, activation="relu", input_dim=4))
model.add(Dense(units=2, activation="softmax"))

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                50        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 22        
Total params: 72
Trainable params: 72
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [16]:
model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
368/368 - 1s - loss: 0.2703 - accuracy: 0.9400
Epoch 2/100
368/368 - 0s - loss: 0.1606 - accuracy: 0.9640
Epoch 3/100
368/368 - 0s - loss: 0.1513 - accuracy: 0.9640
Epoch 4/100
368/368 - 0s - loss: 0.1464 - accuracy: 0.9640
Epoch 5/100
368/368 - 0s - loss: 0.1434 - accuracy: 0.9638
Epoch 6/100
368/368 - 0s - loss: 0.1416 - accuracy: 0.9639
Epoch 7/100
368/368 - 0s - loss: 0.1403 - accuracy: 0.9640
Epoch 8/100
368/368 - 0s - loss: 0.1402 - accuracy: 0.9637
Epoch 9/100
368/368 - 0s - loss: 0.1395 - accuracy: 0.9638
Epoch 10/100
368/368 - 0s - loss: 0.1392 - accuracy: 0.9639
Epoch 11/100
368/368 - 0s - loss: 0.1390 - accuracy: 0.9639
Epoch 12/100
368/368 - 0s - loss: 0.1384 - accuracy: 0.9639
Epoch 13/100
368/368 - 0s - loss: 0.1385 - accuracy: 0.9638
Epoch 14/100
368/368 - 0s - loss: 0.1381 - accuracy: 0.9639
Epoch 15/100
368/368 - 0s - loss: 0.1378 - accuracy: 0.9638
Epoch 16/100
368/368 - 0s - loss: 0.1376 - accuracy: 0.9638
Epoch 17/100
368/368 - 0s - loss: 0.1375 - accura

<tensorflow.python.keras.callbacks.History at 0x7f8ecf646220>

In [17]:
deep_model = Sequential()
deep_model.add(Dense(units=10, activation="relu", input_dim=4))
deep_model.add(Dense(units=10, activation="relu"))
deep_model.add(Dense(units=2, activation="softmax"))

In [18]:
deep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 10)                50        
_________________________________________________________________
dense_3 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 22        
Total params: 182
Trainable params: 182
Non-trainable params: 0
_________________________________________________________________


In [19]:
deep_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

deep_model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
368/368 - 1s - loss: 0.2562 - accuracy: 0.9507
Epoch 2/100
368/368 - 0s - loss: 0.1544 - accuracy: 0.9640
Epoch 3/100
368/368 - 0s - loss: 0.1467 - accuracy: 0.9640
Epoch 4/100
368/368 - 0s - loss: 0.1434 - accuracy: 0.9640
Epoch 5/100
368/368 - 0s - loss: 0.1414 - accuracy: 0.9640
Epoch 6/100
368/368 - 0s - loss: 0.1402 - accuracy: 0.9640
Epoch 7/100
368/368 - 0s - loss: 0.1394 - accuracy: 0.9640
Epoch 8/100
368/368 - 0s - loss: 0.1387 - accuracy: 0.9640
Epoch 9/100
368/368 - 0s - loss: 0.1379 - accuracy: 0.9640
Epoch 10/100
368/368 - 0s - loss: 0.1375 - accuracy: 0.9640
Epoch 11/100
368/368 - 0s - loss: 0.1370 - accuracy: 0.9640
Epoch 12/100
368/368 - 0s - loss: 0.1370 - accuracy: 0.9640
Epoch 13/100
368/368 - 0s - loss: 0.1363 - accuracy: 0.9640
Epoch 14/100
368/368 - 0s - loss: 0.1364 - accuracy: 0.9639
Epoch 15/100
368/368 - 0s - loss: 0.1361 - accuracy: 0.9640
Epoch 16/100
368/368 - 0s - loss: 0.1359 - accuracy: 0.9640
Epoch 17/100
368/368 - 0s - loss: 0.1355 - accura

<tensorflow.python.keras.callbacks.History at 0x7f8ed013c1f0>

In [20]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

123/123 - 0s - loss: 0.1428 - accuracy: 0.9610
Loss: 0.14284873008728027, Accuracy: 0.9610190987586975


In [21]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

123/123 - 0s - loss: 0.1451 - accuracy: 0.9597
Deep Neural Network - Loss: 0.1451275497674942, Accuracy: 0.9597452282905579
