# Example on how to save a trained learner (via pickling) so we don't have to retrain it again if we want to reuse it later.

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml
import math

%matplotlib inline
np.random.seed(0)    # Default seeding for hw

# Data Loading
X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)
X,Y = ml.shuffleData(X,Y)

# Split the data
Xtr, Xva, Ytr, Yva = ml.splitData(X, Y)
Xt, Yt = Xtr[:5000], Ytr[:5000] # subsample for efficiency (you can go higher)

In [2]:
# Train
learner = ml.dtree.treeClassify(Xt, Yt, maxDepth=7, minParent=3, minLeaf=2)
print('Testing AUC   : {}'.format(learner.auc(Xt, Yt)))
print('Validation AUC: {}'.format(learner.auc(Xva, Yva)))

Testing AUC   : 0.7473025716293824
Validation AUC: 0.6309046554128769


In [3]:
# Predict
probs = learner.predictSoft(Xva)
print(probs)

[[ 0.55042017  0.44957983]
 [ 0.81759259  0.18240741]
 [ 0.60799136  0.39200864]
 ..., 
 [ 0.56048387  0.43951613]
 [ 0.69277108  0.30722892]
 [ 0.79461756  0.20538244]]


In [11]:
# Save the learner (pickle) for loading later
pickle.dump(learner, open('pickle/example_learner.pkl', "wb" ))

In [17]:
# Loading (sometime later or in a different notebook)
new_learner = pickle.load( open('pickle/example_learner.pkl', "rb" ) )

In [18]:
# Make sure learner we loaded still works the same
new_probs = new_learner.predictSoft(Xva)
print(new_probs)

[[ 0.55042017  0.44957983]
 [ 0.81759259  0.18240741]
 [ 0.60799136  0.39200864]
 ..., 
 [ 0.56048387  0.43951613]
 [ 0.69277108  0.30722892]
 [ 0.79461756  0.20538244]]


# Save all the learners under path pickle/ so it can be easily found and loaded into ensemble