# BinaryInteraction InteractedOnly Model Prediction Preparation

BinaryInteraction InteractedOnly Model Table will be prepared for use in Prediction Notebook

This table will be used for supply extra information about recommendation

In [1]:
#Importing libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings

In [2]:
#Printing library versions
print('numpy Version: ' + np.__version__)
print('pandas Version: ' + pd.__version__)
print('tensorflow Version: ' + tf.__version__)

numpy Version: 1.16.5
pandas Version: 0.25.1
tensorflow Version: 2.0.0


In [3]:
#GPU will be used for Prediction and Evaluation
myGPU = tf.test.gpu_device_name()
if myGPU:
    print(myGPU)
else:
    print("Error")

/device:GPU:0


In [4]:
#Reading Qualified BinaryInteraction InteractedOnly training data from pkl file
trainingDf = pd.read_pickle("../Data/pkl/1M/Qualified/BinaryInteraction/InteractedOnly/Training.pkl")
trainingDf

Unnamed: 0,UserId,MovieId,Interaction
0,4356,1027,1
1,3946,236,1
2,1399,113,1
3,3078,1586,1
4,8244,806,1
...,...,...,...
753540,3299,795,1
753541,4805,3723,1
753542,8255,94,1
753543,5854,1077,1


In [5]:
#creating a user array for training input 
trainingUser = trainingDf[['UserId']].values.astype(np.int32)
trainingUser = trainingUser.reshape((trainingUser.shape[0]))
trainingUser

array([4356, 3946, 1399, ..., 8255, 5854,  216])

In [6]:
#creating a movie array for training input 
trainingMovie = trainingDf[['MovieId']].values.astype(np.int32)
trainingMovie = trainingMovie.reshape((trainingMovie.shape[0]))
trainingMovie

array([1027,  236,  113, ...,   94, 1077, 1683])

In [7]:
#creating an array for training input 
trainingX = [trainingUser, trainingMovie]
trainingX

[array([4356, 3946, 1399, ..., 8255, 5854,  216]),
 array([1027,  236,  113, ...,   94, 1077, 1683])]

In [8]:
#creating an array for training output 
trainingY = trainingDf[['Interaction']].values.astype(np.int8)
trainingY = trainingY.reshape((trainingY.shape[0]))
trainingY

array([1, 1, 1, ..., 1, 1, 1], dtype=int8)

In [9]:
#Reading Qualified BinaryInteraction InteractedOnly validation data from pkl file
validationDf = pd.read_pickle("../Data/pkl/1M/Qualified/BinaryInteraction/InteractedOnly/Validation.pkl")
validationDf

Unnamed: 0,UserId,MovieId,Interaction
0,7693,399,1
1,9590,1096,1
2,8546,285,1
3,9622,588,1
4,3870,1342,1
...,...,...,...
123669,8317,517,1
123670,2133,25,1
123671,8927,694,1
123672,9450,88,1


In [10]:
#creating a user array for validation input 
validationUser = validationDf[['UserId']].values.astype(np.int32)
validationUser = validationUser.reshape((validationUser.shape[0]))
validationUser

array([7693, 9590, 8546, ..., 8927, 9450, 2806])

In [11]:
#creating a movie array for validation input 
validationMovie = validationDf[['MovieId']].values.astype(np.int32)
validationMovie = validationMovie.reshape((validationMovie.shape[0]))
validationMovie

array([ 399, 1096,  285, ...,  694,   88,   61])

In [12]:
#creating an array for validation input 
validationX = [validationUser, validationMovie]
validationX

[array([7693, 9590, 8546, ..., 8927, 9450, 2806]),
 array([ 399, 1096,  285, ...,  694,   88,   61])]

In [13]:
#creating an array for validation output 
validationY = validationDf[['Interaction']].values.astype(np.int8)
validationY = validationY.reshape((validationY.shape[0]))
validationY

array([1, 1, 1, ..., 1, 1, 1], dtype=int8)

In [14]:
#Reading Qualified BinaryInteraction InteractedOnly test data from pkl file
testDf = pd.read_pickle("../Data/pkl/1M/Qualified/BinaryInteraction/InteractedOnly/Test.pkl")
testDf

Unnamed: 0,UserId,MovieId,Interaction
0,6731,149,1
1,3630,626,1
2,205,387,1
3,6673,2122,1
4,9225,3848,1
...,...,...,...
123669,8043,446,1
123670,2928,3526,1
123671,7986,1796,1
123672,2024,2730,1


In [15]:
#creating a user array for test input 
testUser = testDf[['UserId']].values.astype(np.int32)
testUser = testUser.reshape((testUser.shape[0]))
testUser

array([6731, 3630,  205, ..., 7986, 2024, 3251])

In [16]:
#creating a movie array for test input 
testMovie = testDf[['MovieId']].values.astype(np.int32)
testMovie = testMovie.reshape((testMovie.shape[0]))
testMovie

array([ 149,  626,  387, ..., 1796, 2730, 1782])

In [17]:
#creating an array for test input 
testX = [testUser, testMovie]
testX

[array([6731, 3630,  205, ..., 7986, 2024, 3251]),
 array([ 149,  626,  387, ..., 1796, 2730, 1782])]

In [18]:
#creating an array for test output 
testY = testDf[['Interaction']].values.astype(np.int8)
testY = testY.reshape((testY.shape[0]))
testY

array([1, 1, 1, ..., 1, 1, 1], dtype=int8)

In [19]:
#ignore warnings due to Converting sparse IndexedSlices to a dense Tensor of unknown shape warning
warnings.filterwarnings('ignore')

#Best Model For BinaryInteraction InteractedOnly dataset loading from h5 file
#See Training1 notebook for more information
model = tf.keras.models.load_model("../Model/InteractedOnlyModel/Model3.h5")

In [20]:
#Best Model evaluating with training values
with tf.device('/GPU:0'):
    trainingResult = model.evaluate(x = trainingX, y = trainingY, batch_size = 64, verbose = 0)

In [21]:
#Print trainingResult
trainingResult

[1.0802068698471309e-06, 753545.0, 0.0, 0.0, 0.0, 1.0]

In [22]:
#Best Model evaluating with validation values
with tf.device('/GPU:0'):
    validationResult = model.evaluate(x = validationX, y = validationY, batch_size = 64, verbose = 0)

In [23]:
#Print validationResult
validationResult

[1.0800193643569693e-06, 123674.0, 0.0, 0.0, 0.0, 1.0]

In [24]:
#Best Model evaluating with test values
with tf.device('/GPU:0'):
    testResult = model.evaluate(x = testX, y = testY, batch_size = 64, verbose = 0)

In [25]:
#Print testResult
testResult

[1.0791889657176365e-06, 123674.0, 0.0, 0.0, 0.0, 1.0]

In [26]:
#Creating DataFrame for data and model metrics
modelDf = pd.DataFrame(data = {'ModelData': pd.Series(['Training', 'Validation', 'Test'], dtype='str'),
                               'Negative' : pd.Series([trainingDf[trainingDf['Interaction'] == 0].shape[0], validationDf[validationDf['Interaction'] == 0].shape[0], testDf[testDf['Interaction'] == 0].shape[0]], dtype='int'),
                               'Positive' : pd.Series([trainingDf[trainingDf['Interaction'] == 1].shape[0], validationDf[validationDf['Interaction'] == 1].shape[0], testDf[testDf['Interaction'] == 1].shape[0]], dtype='int'),
                               'Loss': pd.Series([trainingResult[0], validationResult[0], testResult[0]], dtype='float'),
                               'TP': pd.Series([trainingResult[1], validationResult[1], testResult[1]], dtype='int'),
                               'FP': pd.Series([trainingResult[2], validationResult[2], testResult[2]], dtype='int'),
                               'TN': pd.Series([trainingResult[3], validationResult[3], testResult[3]], dtype='int'),
                               'FN': pd.Series([trainingResult[4], validationResult[4], testResult[4]], dtype='int'),
                               'Accuracy': pd.Series([trainingResult[5], validationResult[5], testResult[5]], dtype='float')})
modelDf

Unnamed: 0,ModelData,Negative,Positive,Loss,TP,FP,TN,FN,Accuracy
0,Training,0,753545,1e-06,753545,0,0,0,1.0
1,Validation,0,123674,1e-06,123674,0,0,0,1.0
2,Test,0,123674,1e-06,123674,0,0,0,1.0


In [27]:
#Saving modeldf and Clearing memory
modelDf.to_pickle("../PredictData/ModelTable/InteractedOnly.pkl")
del modelDf

In [28]:
#Best Model predicting training Interactions
with tf.device('/GPU:0'):
    predictTraining = model.predict(x = trainingX)
predictTraining

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)

In [29]:
#Predicted Interactions appending trainingDf
trainingDf['InteractionPredict'] = predictTraining
trainingDf

Unnamed: 0,UserId,MovieId,Interaction,InteractionPredict
0,4356,1027,1,1.0
1,3946,236,1,1.0
2,1399,113,1,1.0
3,3078,1586,1,1.0
4,8244,806,1,1.0
...,...,...,...,...
753540,3299,795,1,1.0
753541,4805,3723,1,1.0
753542,8255,94,1,1.0
753543,5854,1077,1,1.0


In [30]:
#Best Model predicting validation Interactions
with tf.device('/GPU:0'):
    predictValidation = model.predict(x = validationX)
predictValidation

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)

In [31]:
#Predicted Interactions appending validaitonDf
validationDf['InteractionPredict'] = predictValidation
validationDf

Unnamed: 0,UserId,MovieId,Interaction,InteractionPredict
0,7693,399,1,1.0
1,9590,1096,1,1.0
2,8546,285,1,1.0
3,9622,588,1,1.0
4,3870,1342,1,1.0
...,...,...,...,...
123669,8317,517,1,1.0
123670,2133,25,1,1.0
123671,8927,694,1,1.0
123672,9450,88,1,1.0


In [32]:
#Best Model predicting test Interactions
with tf.device('/GPU:0'):
    predictTest = model.predict(x = testX)
predictTest

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)

In [33]:
#Predicted Interactions appending testDf
testDf['InteractionPredict'] = predictTest
testDf

Unnamed: 0,UserId,MovieId,Interaction,InteractionPredict
0,6731,149,1,1.0
1,3630,626,1,1.0
2,205,387,1,1.0
3,6673,2122,1,1.0
4,9225,3848,1,1.0
...,...,...,...,...
123669,8043,446,1,1.0
123670,2928,3526,1,1.0
123671,7986,1796,1,1.0
123672,2024,2730,1,1.0


In [34]:
#Dataframes merging
allDataFrame = pd.concat([trainingDf, validationDf, testDf]).reset_index(drop=True)
allDataFrame

Unnamed: 0,UserId,MovieId,Interaction,InteractionPredict
0,4356,1027,1,1.0
1,3946,236,1,1.0
2,1399,113,1,1.0
3,3078,1586,1,1.0
4,8244,806,1,1.0
...,...,...,...,...
1000888,8043,446,1,1.0
1000889,2928,3526,1,1.0
1000890,7986,1796,1,1.0
1000891,2024,2730,1,1.0


In [35]:
#Getting all unique UserId from allDataFrame and create new Data Frame from this values
#This Data frame will used as lookup table for prediction script
userRange = range(allDataFrame['UserId'].unique().shape[0])
lookupTable = pd.DataFrame(data = {'User' : userRange})
lookupTable

Unnamed: 0,User
0,0
1,1
2,2
3,3
4,4
...,...
10068,10068
10069,10069
10070,10070
10071,10071


In [36]:
#Defined function will be checked if prediction is true
#rounding prediction returns closest label (0 or 1)
def CorrectPrediction(interaction, prediction):
    result = 0
    for i in range(len(interaction)):
        if interaction[i] == int(np.round(prediction[i])):
            result += 1
    return result

In [37]:
#Calculations for each user
calculations = np.empty(shape = (8, lookupTable.shape[0]))
for i in userRange:
    
    #Training dataframe calculations
    queryFrame = trainingDf[trainingDf['UserId'] == i]
    calculations[0][i] = queryFrame.shape[0]
    calculations[1][i] = CorrectPrediction(queryFrame['Interaction'].values, queryFrame['InteractionPredict'].values)

        
    #Validaiton dataframe calculations
    queryFrame = validationDf[validationDf['UserId'] == i]
    calculations[2][i] = queryFrame.shape[0]
    calculations[3][i] = CorrectPrediction(queryFrame['Interaction'].values, queryFrame['InteractionPredict'].values)
    
    #Test dataframe calculations
    queryFrame = testDf[testDf['UserId'] == i]
    calculations[4][i] = queryFrame.shape[0]
    calculations[5][i] = CorrectPrediction(queryFrame['Interaction'].values, queryFrame['InteractionPredict'].values)

    
    #All dataframe calculations
    #Since All dataframe contains training, validation and test dataframes
    #All dataframe results can be calculated by simply summing the training, validation and test results
    calculations[6][i] = calculations[0][i] + calculations[2][i] + calculations[4][i]
    calculations[7][i] = calculations[1][i] + calculations[3][i] + calculations[5][i]


#print
calculations

array([[  9.,  13.,   6., ...,  29.,  36., 714.],
       [  9.,  13.,   6., ...,  29.,  36., 714.],
       [  2.,   0.,   4., ...,   3.,   5., 113.],
       ...,
       [  5.,   2.,   1., ...,   2.,   1., 145.],
       [ 16.,  15.,  11., ...,  34.,  42., 972.],
       [ 16.,  15.,  11., ...,  34.,  42., 972.]])

In [38]:
#Calculations adding lookup table

#Training data representation count
lookupTable['TraingingRep'] = calculations[0].astype(np.int32)
#Training data Correct Predictions
lookupTable['TrainingCorrect'] = calculations[1].astype(np.int32)

#Validation data representation count
lookupTable['ValidationRep'] = calculations[2].astype(np.int32)
#validation data Correct Predictions
lookupTable['ValidationCorrect'] = calculations[3].astype(np.int32)

#Test data representation count
lookupTable['TestRep'] = calculations[4].astype(np.int32)
#test data Correct Predictions
lookupTable['TestCorrect'] = calculations[5].astype(np.int32)

#All data representation count
lookupTable['AllRep'] = calculations[6].astype(np.int32)
#all data Correct Predictions
lookupTable['AllCorrect'] = calculations[7].astype(np.int32)

lookupTable

Unnamed: 0,User,TraingingRep,TrainingCorrect,ValidationRep,ValidationCorrect,TestRep,TestCorrect,AllRep,AllCorrect
0,0,9,9,2,2,5,5,16,16
1,1,13,13,0,0,2,2,15,15
2,2,6,6,4,4,1,1,11,11
3,3,538,538,95,95,103,103,736,736
4,4,53,53,8,8,11,11,72,72
...,...,...,...,...,...,...,...,...,...
10068,10068,14,14,2,2,2,2,18,18
10069,10069,19,19,2,2,3,3,24,24
10070,10070,29,29,3,3,2,2,34,34
10071,10071,36,36,5,5,1,1,42,42


In [39]:
#Lookup table save as pkl file
lookupTable.to_pickle("../PredictData/LookupTable/InteractedOnly.pkl")
del calculations
del trainingDf
del validationDf
del testDf
del allDataFrame
del lookupTable