In [14]:
import treon
import unittest
import pandas as pd
import numpy as np
import pickle

In [15]:
pickled_model = pickle.load(open(r'C:\Users\louis\OneDrive - McGill University\MMA Winter 2021 Notes\INSY695\classifier.pkl', 'rb'))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [58]:
df = pd.read_csv(r"D:/GitHubLocal/NHL-Game-II/Period_1_Game_Stats_Final_ModelReady(April-10th-2022).csv")

In [97]:
class TestModel(unittest.TestCase):
    
    
    def test_model_predictions(self):
        """
        Test to see if pickled model label prediction = 1 OR = 0
        Test to see if pickled model probability prediction in [0,1]
        """
        fake_data = [[30,5,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0]]
        
        label_pred = pickled_model.predict(fake_data)
        proba_pred = pickled_model.predict_proba(fake_data)
        
        self.assertTrue( (label_pred[0] == 0) | (label_pred[0] == 1) )
        self.assertTrue( (abs(np.max(proba_pred) < 1)) & abs((np.min(proba_pred) < 1)) )
        
    def test_dataset_shape(self):
        """
        Test to see if main dataset has 21 features and isn't empty
        """
        self.assertTrue( (df.shape[0] != 0) & (df.shape[1] == 21) )
        
    def test_dataset_types(self):
        """
        Test to see if data types in dataset are consistent
        """
        self.assertTrue( df.won.dtypes == "int64")
        self.assertTrue( df.Shots.dtypes == "float64")
        self.assertTrue( df.Shots_Against.dtypes == "float64")
        self.assertTrue( df.Goals.dtypes == "float64")
        self.assertTrue( df.Goals_Against.dtypes == "float64")
        self.assertTrue( df.Takeaways.dtypes == "float64")
        self.assertTrue( df.Takeaways_Against.dtypes == "float64")
        self.assertTrue( df.Hits.dtypes == "float64")
        self.assertTrue( df.Hits_Against.dtypes == "float64")
        self.assertTrue( df["Blocked Shots"].dtypes == "float64")
        self.assertTrue( df["Blocked Shots Against"].dtypes == "float64")
        self.assertTrue( df.Giveaways.dtypes == "float64")
        self.assertTrue( df.Giveaways_Against.dtypes == "float64")
        self.assertTrue( df["Missed Shots"].dtypes == "float64")
        self.assertTrue( df["Missed Shots_Against"].dtypes == "float64")
        self.assertTrue( df.Penalities.dtypes == "float64")
        self.assertTrue( df.Penalities_Against.dtypes == "float64")
        self.assertTrue( df["#Won Faceoffs"].dtypes == "float64")
        self.assertTrue( df["#Lost Faceoffs"].dtypes == "float64")
        self.assertTrue( df.HoA_away.dtypes == "int64")
        self.assertTrue( df.HoA_home.dtypes == "int64")      
    
    def test_dataset_values(self):
        """
        Test to see if values in dataset are logically consistent
        """
        #Check to see if values in won are either 0 or 1
        self.assertTrue( df.won.isin([0,1]).all() )
        
        #Check to see if values in HoA_away and HoA_home are either 0 or 1
        self.assertTrue( df.HoA_away.isin([0,1]).all() )
        self.assertTrue( df.HoA_home.isin([0,1]).all() )
        
        #Check to see if values for float features are non-negative
        self.assertFalse( (df.Shots < 0).values.any() )
        self.assertFalse( (df.Shots_Against < 0).values.any() )
        self.assertFalse( (df.Goals < 0).values.any() )
        self.assertFalse( (df.Goals_Against < 0).values.any() )
        self.assertFalse( (df.Takeaways < 0).values.any() )
        self.assertFalse( (df.Takeaways_Against < 0).values.any() )
        self.assertFalse( (df.Hits < 0).values.any() )
        self.assertFalse( (df.Hits_Against < 0).values.any() )
        self.assertFalse( (df["Blocked Shots"] < 0).values.any() )
        self.assertFalse( (df["Blocked Shots Against"] < 0).values.any() )
        self.assertFalse( (df.Giveaways < 0).values.any() )
        self.assertFalse( (df.Giveaways_Against < 0).values.any() )
        self.assertFalse( (df["Missed Shots"] < 0).values.any() )
        self.assertFalse( (df["Missed Shots_Against"] < 0).values.any() )
        self.assertFalse( (df.Penalities < 0).values.any() )
        self.assertFalse( (df.Penalities_Against < 0).values.any() )
        self.assertFalse( (df["#Won Faceoffs"] < 0).values.any() )
        self.assertFalse( (df["#Lost Faceoffs"] < 0).values.any() )


In [98]:
df.head()

Unnamed: 0,won,Shots,Shots_Against,Goals,Goals_Against,Takeaways,Takeaways_Against,Hits,Hits_Against,Blocked Shots,...,Giveaways,Giveaways_Against,Missed Shots,Missed Shots_Against,Penalities,Penalities_Against,#Won Faceoffs,#Lost Faceoffs,HoA_away,HoA_home
0,0,8.0,8.0,0.0,3.0,1.0,3.0,14.0,5.0,3.0,...,6.0,7.0,4.0,0.0,1.0,1.0,10.0,12.0,1,0
1,1,8.0,8.0,3.0,0.0,3.0,1.0,5.0,14.0,3.0,...,7.0,6.0,0.0,4.0,1.0,1.0,12.0,10.0,0,1
2,1,11.0,12.0,0.0,1.0,0.0,2.0,4.0,4.0,6.0,...,2.0,0.0,6.0,3.0,1.0,3.0,11.0,9.0,1,0
3,0,12.0,11.0,1.0,0.0,2.0,0.0,4.0,4.0,8.0,...,0.0,2.0,3.0,6.0,3.0,1.0,9.0,11.0,0,1
4,1,9.0,8.0,0.0,1.0,3.0,2.0,4.0,5.0,7.0,...,7.0,5.0,3.0,8.0,2.0,1.0,7.0,11.0,1,0


In [99]:
df.columns

Index(['won', 'Shots', 'Shots_Against', 'Goals', 'Goals_Against', 'Takeaways',
       'Takeaways_Against', 'Hits', 'Hits_Against', 'Blocked Shots',
       'Blocked Shots Against', 'Giveaways', 'Giveaways_Against',
       'Missed Shots', 'Missed Shots_Against', 'Penalities',
       'Penalities_Against', '#Won Faceoffs', '#Lost Faceoffs', 'HoA_away',
       'HoA_home'],
      dtype='object')

In [93]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

....
----------------------------------------------------------------------
Ran 4 tests in 0.008s

OK
