In [2]:
import glob
import os

import numpy as np
import pandas as pd
import h5py as h5
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.externals import joblib

train_file = 'features/train_stats_features.h5'
test_file = 'features/test_stats_features.h5'
out_dir = 'predictions'
model_dir = 'models/grad_boost'

if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

with h5.File(train_file) as train:
    with h5.File(test_file) as test:
        n_ch = 16
        models = []
        for i in range(n_ch):
            models.append(joblib.load('%s/ch%d.pkl' % (model_dir, i)))

        n_cases = test['ch0'].shape[0]
        predictions = np.zeros((n_ch, n_cases))
        
        train_y = [int(name.split('.')[-2].split('_')[-1]) for name in train['names'][:]]
        for i in range(n_ch):
            ch = 'ch%d' % i
            
            train_x = train[ch][:]
            test_x = test[ch][:]
            predictions[i] = models[i].predict(test_x)
            
        print(predictions)
        print(predictions.mean(axis = 0))

        output = pd.DataFrame(data = test['names'][:], columns = ['File'])
        output['Class'] = pd.Series(predictions.mean(axis = 0))

        output.to_csv('%s/stats_grad_boost_prediction.csv' % out_dir, index = False)

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 1.  1.  1. ...,  1.  1.  1.]
 [ 1.  1.  1. ...,  1.  1.  1.]]
[ 0.3125  0.3125  0.3125 ...,  0.375   0.3125  0.3125]
