In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../Resources/cleaned_data.csv")
df.head()

Unnamed: 0,Title,Year,Age,IMDb,Rotten Tomatoes,Netflix,Hulu,Prime Video,Disney+,Directors,Genres,Country,Runtime,production_company,Top_Genres,Top_Director
0,Inception,2010,13+,8.8,8.7,1,0,0,0,Christopher Nolan,"Action,Adventure,Sci-Fi,Thriller",United States,148.0,Warner Bros.,Action,Christopher Nolan
1,The Matrix,1999,18+,8.7,8.7,1,0,0,0,"Lana Wachowski,Lilly Wachowski","Action,Sci-Fi",United States,136.0,,Action,Lana Wachowski
2,Avengers: Infinity War,2018,13+,8.5,8.4,1,0,0,0,"Anthony Russo,Joe Russo","Action,Adventure,Sci-Fi",United States,149.0,,Action,Anthony Russo
3,Back to the Future,1985,7+,8.5,9.6,1,0,0,0,Robert Zemeckis,"Adventure,Comedy,Sci-Fi",United States,116.0,,Adventure,Robert Zemeckis
4,"The Good, the Bad and the Ugly",1966,18+,8.8,9.7,1,0,1,0,Sergio Leone,Western,Italy,161.0,,Western,Sergio Leone


In [3]:
df = df[["Year", "Country", "Runtime", "Top_Genres", "IMDb"]]
df = df.rename(columns={"Top_Genres":"Genre"})
df = df.dropna()
df["Runtime"] = df[["Runtime"]].round(0).astype(int)
df["IMDb"] = (df["IMDb"]*10).round(0).astype(int)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,United States,148,Action,88
1,1999,United States,136,Action,87
2,2018,United States,149,Action,85
3,1985,United States,116,Adventure,85
4,1966,Italy,161,Western,88


In [4]:
cleanup_genre = {"Genre": {'Action': 5, 'Adventure': 5, 'Western': 3, 'Animation': 7, 'Biography': 1,
       'Drama': 8, 'Crime': 5, 'Comedy': 4, 'Documentary': 1, 'Family': 8, 'Horror': 6,
       'Thriller': 5, 'Mystery': 5, 'Fantasy': 7, 'Romance': 8, 'Short': 7, 'Sci-Fi': 6,
       'Sport': 2, 'Reality-TV': 4, 'Musical': 7, 'Music': 7, 'War': 3, 'History': 1,
       'Film-Noir': 6, 'Talk-Show': 4, 'Game-Show': 4}}

In [5]:
df = df.replace(cleanup_genre)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,United States,148,5,88
1,1999,United States,136,5,87
2,2018,United States,149,5,85
3,1985,United States,116,5,85
4,1966,Italy,161,3,88


In [6]:
df.loc[df["Country"] == "United States", "Country"] = 1
df.loc[df["Country"] != 1, "Country"] = 0
df["Country"] = df["Country"].astype(int)
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,1,148,5,88
1,1999,1,136,5,87
2,2018,1,149,5,85
3,1985,1,116,5,85
4,1966,0,161,3,88


In [7]:
df.loc[df["IMDb"] < 70, "IMDb"] = 0
df.loc[df["IMDb"] >= 70, "IMDb"] = 1
df.head()

Unnamed: 0,Year,Country,Runtime,Genre,IMDb
0,2010,1,148,5,1
1,1999,1,136,5,1
2,2018,1,149,5,1
3,1985,1,116,5,1
4,1966,0,161,3,1


In [8]:
data = df.drop("IMDb", axis=1)
target = df["IMDb"].values.reshape(-1, 1)
print(data.shape, target.shape)

(15699, 4) (15699, 1)


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=1)

In [10]:
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(units=12, activation="relu", input_dim=4))
model.add(Dense(units=2, activation="softmax"))

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 12)                60        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 26        
Total params: 86
Trainable params: 86
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [16]:
model.fit(X_train_scaled, y_train_categorical, epochs=200, shuffle=True, verbose=2)

Epoch 1/200
368/368 - 1s - loss: 0.5539 - accuracy: 0.7505
Epoch 2/200
368/368 - 0s - loss: 0.5075 - accuracy: 0.7658
Epoch 3/200
368/368 - 0s - loss: 0.4946 - accuracy: 0.7647
Epoch 4/200
368/368 - 0s - loss: 0.4852 - accuracy: 0.7677
Epoch 5/200
368/368 - 0s - loss: 0.4782 - accuracy: 0.7711
Epoch 6/200
368/368 - 0s - loss: 0.4732 - accuracy: 0.7714
Epoch 7/200
368/368 - 0s - loss: 0.4698 - accuracy: 0.7730
Epoch 8/200
368/368 - 0s - loss: 0.4678 - accuracy: 0.7757
Epoch 9/200
368/368 - 0s - loss: 0.4661 - accuracy: 0.7762
Epoch 10/200
368/368 - 0s - loss: 0.4648 - accuracy: 0.7782
Epoch 11/200
368/368 - 0s - loss: 0.4636 - accuracy: 0.7786
Epoch 12/200
368/368 - 0s - loss: 0.4629 - accuracy: 0.7782
Epoch 13/200
368/368 - 0s - loss: 0.4620 - accuracy: 0.7788
Epoch 14/200
368/368 - 0s - loss: 0.4616 - accuracy: 0.7786
Epoch 15/200
368/368 - 0s - loss: 0.4611 - accuracy: 0.7772
Epoch 16/200
368/368 - 0s - loss: 0.4606 - accuracy: 0.7795
Epoch 17/200
368/368 - 0s - loss: 0.4603 - accura

368/368 - 0s - loss: 0.4506 - accuracy: 0.7855
Epoch 138/200
368/368 - 0s - loss: 0.4507 - accuracy: 0.7867
Epoch 139/200
368/368 - 0s - loss: 0.4508 - accuracy: 0.7844
Epoch 140/200
368/368 - 0s - loss: 0.4499 - accuracy: 0.7846
Epoch 141/200
368/368 - 0s - loss: 0.4506 - accuracy: 0.7865
Epoch 142/200
368/368 - 0s - loss: 0.4505 - accuracy: 0.7853
Epoch 143/200
368/368 - 0s - loss: 0.4507 - accuracy: 0.7858
Epoch 144/200
368/368 - 0s - loss: 0.4503 - accuracy: 0.7862
Epoch 145/200
368/368 - 0s - loss: 0.4510 - accuracy: 0.7851
Epoch 146/200
368/368 - 0s - loss: 0.4504 - accuracy: 0.7860
Epoch 147/200
368/368 - 0s - loss: 0.4505 - accuracy: 0.7859
Epoch 148/200
368/368 - 0s - loss: 0.4508 - accuracy: 0.7840
Epoch 149/200
368/368 - 0s - loss: 0.4503 - accuracy: 0.7870
Epoch 150/200
368/368 - 0s - loss: 0.4507 - accuracy: 0.7845
Epoch 151/200
368/368 - 0s - loss: 0.4498 - accuracy: 0.7861
Epoch 152/200
368/368 - 0s - loss: 0.4504 - accuracy: 0.7848
Epoch 153/200
368/368 - 0s - loss: 0.4

<tensorflow.python.keras.callbacks.History at 0x7f876501f8b0>

In [17]:
deep_model = Sequential()
deep_model.add(Dense(units=12, activation="relu", input_dim=4))
deep_model.add(Dense(units=12, activation="relu"))
deep_model.add(Dense(units=2, activation="softmax"))

In [18]:
deep_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 12)                60        
_________________________________________________________________
dense_3 (Dense)              (None, 12)                156       
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 26        
Total params: 242
Trainable params: 242
Non-trainable params: 0
_________________________________________________________________


In [19]:
deep_model.compile(optimizer="adam",
                   loss="categorical_crossentropy",
                   metrics=["accuracy"])

deep_model.fit(X_train_scaled, y_train_categorical, epochs=200, shuffle=True, verbose=2)

Epoch 1/200
368/368 - 1s - loss: 0.5735 - accuracy: 0.7012
Epoch 2/200
368/368 - 0s - loss: 0.4795 - accuracy: 0.7748
Epoch 3/200
368/368 - 0s - loss: 0.4671 - accuracy: 0.7799
Epoch 4/200
368/368 - 0s - loss: 0.4617 - accuracy: 0.7818
Epoch 5/200
368/368 - 0s - loss: 0.4585 - accuracy: 0.7818
Epoch 6/200
368/368 - 0s - loss: 0.4557 - accuracy: 0.7829
Epoch 7/200
368/368 - 0s - loss: 0.4540 - accuracy: 0.7843
Epoch 8/200
368/368 - 0s - loss: 0.4532 - accuracy: 0.7835
Epoch 9/200
368/368 - 0s - loss: 0.4525 - accuracy: 0.7835
Epoch 10/200
368/368 - 0s - loss: 0.4521 - accuracy: 0.7856
Epoch 11/200
368/368 - 0s - loss: 0.4513 - accuracy: 0.7841
Epoch 12/200
368/368 - 0s - loss: 0.4509 - accuracy: 0.7841
Epoch 13/200
368/368 - 0s - loss: 0.4503 - accuracy: 0.7861
Epoch 14/200
368/368 - 0s - loss: 0.4508 - accuracy: 0.7866
Epoch 15/200
368/368 - 0s - loss: 0.4502 - accuracy: 0.7848
Epoch 16/200
368/368 - 0s - loss: 0.4499 - accuracy: 0.7849
Epoch 17/200
368/368 - 0s - loss: 0.4500 - accura

368/368 - 0s - loss: 0.4410 - accuracy: 0.7881
Epoch 138/200
368/368 - 0s - loss: 0.4409 - accuracy: 0.7886
Epoch 139/200
368/368 - 0s - loss: 0.4414 - accuracy: 0.7878
Epoch 140/200
368/368 - 0s - loss: 0.4407 - accuracy: 0.7879
Epoch 141/200
368/368 - 0s - loss: 0.4408 - accuracy: 0.7887
Epoch 142/200
368/368 - 0s - loss: 0.4410 - accuracy: 0.7883
Epoch 143/200
368/368 - 0s - loss: 0.4407 - accuracy: 0.7900
Epoch 144/200
368/368 - 0s - loss: 0.4410 - accuracy: 0.7904
Epoch 145/200
368/368 - 0s - loss: 0.4408 - accuracy: 0.7881
Epoch 146/200
368/368 - 0s - loss: 0.4406 - accuracy: 0.7883
Epoch 147/200
368/368 - 0s - loss: 0.4405 - accuracy: 0.7891
Epoch 148/200
368/368 - 0s - loss: 0.4409 - accuracy: 0.7876
Epoch 149/200
368/368 - 0s - loss: 0.4406 - accuracy: 0.7912
Epoch 150/200
368/368 - 0s - loss: 0.4408 - accuracy: 0.7891
Epoch 151/200
368/368 - 0s - loss: 0.4402 - accuracy: 0.7908
Epoch 152/200
368/368 - 0s - loss: 0.4407 - accuracy: 0.7893
Epoch 153/200
368/368 - 0s - loss: 0.4

<tensorflow.python.keras.callbacks.History at 0x7f876594a820>

In [20]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

123/123 - 0s - loss: 0.4560 - accuracy: 0.7842
Loss: 0.455961138010025, Accuracy: 0.784203827381134


In [21]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

123/123 - 0s - loss: 0.4465 - accuracy: 0.7893
Deep Neural Network - Loss: 0.4465425908565521, Accuracy: 0.7892993688583374
