In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml
import mltools.nnet
np.random.seed(0)

# Load the data
X = np.genfromtxt("data/X_train.txt", delimiter=None)
Y = np.genfromtxt("data/Y_train.txt", delimiter=None)
Xtest = np.genfromtxt("data/X_test.txt",delimiter=None)

X, Y = ml.shuffleData(X, Y)

# Random Forest

In [None]:
Xtr,Xte,Ytr,Yte = ml.splitData(X,Y,0.8)

ensemble = [None] * 25
Ytr_rf = np.zeros((len(Ytr), 25))
Yte_rf = np.zeros((len(Yte), 25))

for i in range(25):
    Xi, Yi = ml.bootstrapData(Xtr, Ytr)
    ensemble[i] = ml.dtree.treeClassify(Xi, Yi, maxDepth=20, nFeatures=10)
    Ytr_rf[:,i], Yte_rf[:,i] = ensemble[i].predict(Xtr), ensemble[i].predict(Xte)
    
    errorTrain, errorValid = np.zeros(4), np.zeros(4)
    bags = [1, 5, 10, 25]
    for i, j in enumerate(bags):
        errorTrain[i] = np.mean((Ytr - Ytr_rf[:,:i+1].mean(axis=1)>0.5))
        errorValid[i] = np.mean((Yte - Yte_rf[:,:i+1].mean(axis=1)>0.5))
        #print("{:02d} members: {} train, {} valid".format(i+1,errorTrain,errorValid))

In [None]:
print(errorTrain)
print(errorValid)

In [None]:
class randomForest(ml.base.classifier):
     def __init__(self, learners):
         self.learners = learners
         self.classes = learners[0].classes
            
     def predictSoft(self,X):
         ysoft = np.zeros((X.shape[0], len(self.classes)))
         for i in range(len(self.learners)): 
            ysoft[:,1] += self.learners[i].predict(X)
         return ysoft / len(self.learners)
    
rf = randomForest(ensemble);
print("AUC Train: ", rf.auc(Xtr,Ytr))
print("AUC Valid: ", rf.auc(Xte,Yte))

In [None]:
Ypred1 = rf.predictSoft(Xtest)
Ypred2 = rf.predictSoft(Xte)

Ypred1 = Ypred1[:,[1]]
Ypred2 = Ypred2[:,[1]]

np.savetxt('Pe1.txt', np.vstack( (np.arange(len(Ypred1)) , Ypred1[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')
np.savetxt('Pv1.txt', np.vstack( (np.arange(len(Ypred2)) , Ypred2[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')

# Linear Regression

In [None]:
D = [0, 1, 2]

ErrTrain = []
ErrTest = []

for i,d in enumerate(D):
    XtrP = ml.transforms.fpoly(Xtr, d, bias=False) 
    XtrP,params = ml.transforms.rescale(XtrP)
    lr = ml.linear.linearRegress( XtrP, Ytr )
    
    Phi = lambda X: ml.transforms.rescale(ml.transforms.fpoly(X, d, False), params)[0]
    
    ErrTrain.append(lr.mse(Phi(Xtr), Ytr))
    ErrTest.append(lr.mse(Phi(Xte), Yte))
    
print(ErrTrain)
print(ErrTest)

In [None]:
Ypred1 = lr.predict(Phi(Xtest))
Ypred2 = lr.predict(Phi(Xte))

np.savetxt('Pe2.txt', np.vstack( (np.arange(len(Ypred1)) , Ypred1[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')
np.savetxt('Pv2.txt', np.vstack( (np.arange(len(Ypred2)) , Ypred2[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',');

# Neural Net

In [None]:
XR,_  = ml.transforms.rescale(X)

In [None]:
nn = ml.nnet.nnetRegress()

nn.init_weights([14, 2, 1], 'random', XR, Y)

nn.train(XR, Y, stopTol=-100, stepsize=0.1, stopIter=256)
print("\n",nn.wts)

In [None]:
Ypred1 = nn.predict(Xtest)
Ypred2 = nn.predict(Xte)

np.savetxt('Pe3.txt', np.vstack( (np.arange(len(Ypred1)) , Ypred1[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')
np.savetxt('Pv3.txt', np.vstack( (np.arange(len(Ypred2)) , Ypred2[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')

# Stacking

In [None]:
Pv1 = np.genfromtxt('Pv0.csv',delimiter=',',skip_header=1)[:,1]
Pv2 = np.genfromtxt('Pv1.csv',delimiter=',',skip_header=1)[:,1]
Pv3 = np.genfromtxt('Pv2.csv',delimiter=',',skip_header=1)[:,1]

Pe1 = np.genfromtxt('Pe0.csv',delimiter=',',skip_header=1)[:,1]
Pe2 = np.genfromtxt('Pe1.csv',delimiter=',',skip_header=1)[:,1]
Pe3 = np.genfromtxt('Pe2.csv',delimiter=',',skip_header=1)[:,1]

In [None]:
Sv = np.hstack((Pv1, Pv2, Pv3))
stack = ml.linear.linearRegress(Sv,Yv, reg=1e-3)
print "** Stacked MSE: ",stack.mse(Sv,Yv)

Se = np.hstack((Pe1,Pe2,Pe3))
PeS = stack.predict(Se)
np.savetxt('Stack.txt', np.vstack( (np.arange(len(PeS)) , PeS[:,0]) ).T, '%d, %.2f',header='ID,Prob1',comments='',delimiter=',')