In [12]:
import pickle
import pandas as pd
from sklearn.metrics import *
import operator
import numpy as np
from scipy import stats

def bestGuess(array, devices):
    index = np.where(array == array.max())
    index = int(index[0][0])
    return devices[index]

def bestGuessIndex(array, devices):
    index = np.where(array == array.max())
    return int(index[0][0])

def zscore(table, confidence, idxDevice):
    x = confidence  
    mean = table['Avg Conf'][idxDevice]
    stdev = table['Std Conf'][idxDevice]
    z = (x - mean) / stdev

    return z
    
def gotItRight(predictions_proba, predictions, devices, y):
    correct = 0
    other = 0
    print(devices)
    print(len(predictions_proba) == len(y))
    for i in range(len(predictions)):
        bestGuess = predictions[i]
        if devices[bestGuess] == y[i]:
            correct+=1
        else:
        
            other+=1
    print("correct is " + str(correct))
    print(len(predictions))

def makePrediction(model, unseen, devices, table, labels):
    loadedModel = pickle.load(open(model, 'rb'))
    unseen = unseen.reindex(sorted(unseen.columns), axis=1)

    
    unseen.drop(unseen.columns[unseen.columns.str.contains('Unname',case = False)],axis = 1, inplace = True)
    
    y = unseen['Device'].tolist() #unseen should only 1 or more devices
    
    for col in labels:
        if col in unseen.columns:
            unseen = unseen.drop(columns= [col])
    print("length of unseen in columns is " + str(len(unseen.columns)))
    predictions = loadedModel.predict(unseen)
    
    
    predictions_proba = loadedModel.predict_proba(unseen)
    print("length of predproba =" + str(len(predictions_proba)))
    print("length of pred =" + str(len(predictions)))
    
    
    i = 0
    
    uncertainDevice = 0
    correctDevice = 0
    certainReplay = 0
    incorrectDevice = 0
    replayCount = 0
    caughtReplay = 0
    confusedSame = 0
    
    for i in range(0, len(predictions_proba)):
        if 'replay' in y[i].lower():
            replayCount +=1 
        myBestGuess = devices[predictions[i]]
        
        
        myBestGuessIndex = predictions[i] # Predictions contains the best guesses, myBestGuessIndex is the bestGuess for each reading
        
        confidence = predictions_proba[i].max()
        
        if zscore(table, confidence , myBestGuessIndex) < -1: # Very Uncertain
            
            
            if 'replay' in y[i].lower(): # Uncertain and its a replay
                caughtReplay += 1
            else:
                uncertainDevice += 1 # Uncertain and it's just a device
                
        else:
            if 'replay' in y[i].lower() and 'replay' in myBestGuess:
                caughtReplay += 1
            
            elif 'replay' in y[i].lower(): # we were certain and it was a replay
                print(y[i], myBestGuess, confidence)
                certainReplay += 1
                
                
            elif myBestGuess == y[i]:  # Certain and got it right
                correctDevice += 1
                
            elif myBestGuess[:-2] == y[i][:-2]:
                confusedSame += 1
            else:   
                incorrectDevice += 1
#                 print(myBestGuess + " was confused with " + y[i])
                    
    gotItRight(predictions_proba, predictions, devices, y)
    
    print("I made " + str(len(predictions)) + " predictions. I caught " 
        + str(caughtReplay) + " replays out of " + str(replayCount)  + ". I was uncertain at a genuine device (Bad) " 
        + str(uncertainDevice) + ". I was certain at a Replay (Very Bad)" + str(certainReplay) 
        + ". I was certain at the right device (Very Good)" + str(correctDevice) + ". I confused model(A) with model(B) "
        + str(confusedSame)
        + ". I was certain but the wrong device (Very Bad)" + str(incorrectDevice))
    
if __name__ == '__main__':
    testDir = r'C:\Users\brnma\test.xlsx'
    path = r'C:\Users\brnma\train.xlsx'
    clf = 'RandomForest d=15 n_est=20Model.sav'
    test = pd.read_excel(testDir)
    df = pd.read_excel(path)
    devices =  df['Device'].unique()
    print(len(devices))
    confidence = pd.read_excel('smallerTable.xlsx')
    labels = ['Device','Model','App','Distance']
    
    makePrediction(clf, test, devices, confidence, labels)
    print("Done")
    

17
length of unseen in columns is 49
length of predproba =1259
length of pred =1259
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.95
replay(A) iHealth Blood Pressure(A) 0.9999320652173912
replay(A) iHealth Blood Pressure(A) 0.9499320652173913
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9499320652173913
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9
replay(A) iHealth Blood Pressure(A) 0.9499320652173913
replay(B) iHealth Blood Pressure(A) 0.9
replay(B) iHealth Blood Pressure(A) 0.9
['Gian Iphone(A)' 'Portable ECG moniter(A)'
 'Bodimetrics Performance monitor(A)' 'Apple Watch(A)'
 'Eko Stethescope(A)' 'iHealth Blood Pressure(A)'
 'iHealth Blood Pressure(B)' 'Fever Sense(A)' 'Portable ECG(B)'
 'Fever Sense(B)' 'iHealth gluco(A)' 'iHea