In [1]:
import os
import json

from utils import utils

In [87]:
NMR = "nmr-87"
VERSION = "10"
CONFIG_PATH = "./config/numerai/%s-%s.json"%(NMR, VERSION)
DATA_PATH = "./data/numerai"

# sklearn model config dict
parameters = [
    {
        "classifier": "mlp",
        "grid": {
                "classifier__activation": [
                    "relu"
                ],
                "classifier__hidden_layer_sizes": [
                    [
                        30,
                        20,
                        10
                    ]
                ],
                "classifier__solver": [
                    "adam"
                ]
            }
    }
]

conf = {
    "name": NMR,
    "data": DATA_PATH,
    "training": {
        "parameters": parameters
    },
    "evaluate": {
        "models": ["mlp"]
    },
    "labels": "target",
    "model_path": "./models/",
    "model_no": VERSION
}

# write the config dict to file
with open(CONFIG_PATH, 'w') as f:
    json.dump(conf, f)

In [78]:
''' Download latest NMR Dataset

'''
from numerapi import NumerAPI

napi = NumerAPI(verbosity="info")
        
# set up directory to download dataset
download_path = "./%s/%s" % (DATA_PATH, NMR)
if not os.path.isdir(download_path):
    os.mkdir(download_path)

# download current dataset
dl_succeeded = napi.download_current_dataset(dest_path=DATA_PATH, conf=conf, unzip=True)
print("Download succeeded: " + str(dl_succeeded))

2017-12-24T13:05:36 INFO numerapi: downloading current dataset...


HTTPError: 500 Server Error: Internal Server Error for url: https://api.numer.ai/competitions/current/dataset

In [6]:
transformation = "pca"

#flag = "numerai_tournament_data.csv"
flag = "numerai_training_data.csv"
output_file = "%s/%s-%s-%s.csv"%(DATA_PATH, NMR, flag, transformation)

In [11]:
# Perform dimensionality reduction on dataset
import pandas as pd

from utils import utils
from preprocess import preprocess

transformer = preprocess.get_transformation(transformation, n_components=10)

path = "%s/%s" % (conf.get("data"), flag)

data = utils.load_data(path)
features = utils.get_features(data)
output = pd.DataFrame(transformer.fit_transform(features))
output['target'] = data['target']

print(output.shape)

#output.to_csv(output_file, sep=',', index=False)

(393613, 11)


In [90]:
# Train sklearn models
import nmr_train

#training_filename = "%s-%s-%s.csv"%(NMR, flag, transformation)
training_filename = "numerai_training_data.csv"
nmr_train.train(conf, training_filename)

Starting run id nmr-87-10-mlp
Starting Grid Search


In [91]:
# Load models
models = conf.get("evaluate").get("models")
estimators = [utils.load_model(conf, model) for model in models]


In [92]:
tournament_file = "numerai_tournament_data.csv"
path = "%s/%s"%(conf.get("data"), tournament_file)
data = utils.load_data(path)
X = utils.get_features(data)

# Evaluate single model
estimator = estimators[0]
probabilities = estimator.predict_proba(X)

In [93]:
import pandas as pd
 
predictions = pd.DataFrame()
predictions['id'] = data['id']
predictions['probability'] = probabilities[:,1]

pred_path = "%s/%s-%s.csv" % ("./predictions", conf.get("name"), conf.get("model_no"))
predictions.to_csv(pred_path,
                    sep=',', index=False)