# Chapter 3.5 Using an Arbitrary Classification Model as a Black-Box

This code is my attempt at using the Neural Network as a black-box method for predicting ratings.

Do note that this is a SAMPLE code and is under no way optimized for PRODUCTION use. Feel free to use it for educational purposes.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt
%matplotlib inline

In [8]:
# Recreate the data from the book (Figure 3.4, page 88)

data = [
    [2, np.NAN, 5, 5],
    [np.NAN, 1, 4, 4],
    [3, np.NAN, 1, np.NAN],
    [np.NAN, 5, 1, np.NAN],
    [1, 1, 4, np.NAN],
    [5, np.NAN, np.NAN, 1]
]

data_df = pd.DataFrame(
    index=range(1, 7),
    columns=['Gladiator', 'Ben-hur', 'Godfather', 'Goodfellas'],
    data=data
)

data_df

Unnamed: 0,Gladiator,Ben-hur,Godfather,Goodfellas
1,2.0,,5.0,5.0
2,,1.0,4.0,4.0
3,3.0,,1.0,
4,,5.0,1.0,
5,1.0,1.0,4.0,
6,5.0,,,1.0


In [14]:
# Calculate and store the mean value for each row

data_df['mean_value'] = data_df.apply(lambda x: x.mean(), axis=1)

data_df

Unnamed: 0,Gladiator,Ben-hur,Godfather,Goodfellas,mean_value
1,2.0,,5.0,5.0,4.0
2,,1.0,4.0,4.0,3.0
3,3.0,,1.0,,2.0
4,,5.0,1.0,,3.0
5,1.0,1.0,4.0,,2.0
6,5.0,,,1.0,3.0


In [17]:
for movie in data_df.columns:
    data_df[movie] = data_df[movie] - data_df['mean_value']

In [18]:
data_df

Unnamed: 0,Gladiator,Ben-hur,Godfather,Goodfellas,mean_value
1,-2.0,,1.0,1.0,0.0
2,,-2.0,1.0,1.0,0.0
3,1.0,,-1.0,,0.0
4,,2.0,-2.0,,0.0
5,-1.0,-1.0,2.0,,0.0
6,2.0,,,-2.0,0.0


In [19]:
# Finally, replace NaN values with "0" (which, technically, is a mean value, row-wise)

data_df.fillna(value=0, inplace=True)

data_df

Unnamed: 0,Gladiator,Ben-hur,Godfather,Goodfellas,mean_value
1,-2.0,0.0,1.0,1.0,0.0
2,0.0,-2.0,1.0,1.0,0.0
3,1.0,0.0,-1.0,0.0,0.0
4,0.0,2.0,-2.0,0.0,0.0
5,-1.0,-1.0,2.0,0.0,0.0
6,2.0,0.0,0.0,-2.0,0.0


In [None]:
# Now that we have a matrix that corresponds to Figure 3.4, let's train a neural Network for predicting each
# of the movies