In [8]:
#!/usr/bin/env python

"""
Example classifier on Numerai data using a logistic regression classifier.
To get started, install the required packages: pip install pandas, numpy, sklearn
"""

import pandas as pd
import numpy as np
from sklearn import metrics, preprocessing, linear_model


def main():
    # Set seed for reproducibility
    np.random.seed(0)

    print("Loading data...")
    # Load the data from the CSV files
    ''' training data contains only training data'''
    training_data = pd.read_csv('numerai_training_data.csv', header=0)
    '''
    prediction_data contains both validation data (with targets) and test data
    (without targets)
    '''
    prediction_data = pd.read_csv('numerai_tournament_data.csv', header=0)

    # print('\n')
    # print('prediction_data')
    # print(prediction_data)


    # Transform the loaded CSV data into numpy arrays
    '''
    Goes through all the pd.DataFrame columns titles. If they contain 'feature'
    they are added to the features list
    '''
    features = [f for f in list(training_data) if "feature" in f]
    X = training_data[features]                     # pd.DataFrame of all training features
    Y = training_data["target"]                     # pd.Series of the classes
    x_prediction = prediction_data[features]        # pd.DataFrame of all validation and test features
    ids = prediction_data["id"]
    print('\n')
    print('ids')
    print(ids)
    print('\n')
    print('ids')
    print(ids.duplicated)

    # This is your model that will learn to predict
    model = linear_model.LogisticRegression(n_jobs=-1)

    print("Training...")
    # Your model is trained on the training_data
    model.fit(X, Y)

    print("Predicting...")
    # Your trained model is now used to make predictions on the numerai_tournament_data
    # The model returns two columns: [probability of 0, probability of 1]
    # We are just interested in the probability that the target is 1.
    y_prediction = model.predict_proba(x_prediction)
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability':results})
    joined = pd.DataFrame(ids).join(results_df)

    print("Writing predictions to predictions.csv")
    # Save the predictions out to a CSV file
    joined.to_csv("predictions.csv", index=False)
    # Now you can upload these predictions on numer.ai


if __name__ == '__main__':
    main()


Loading data...


ids
0        446551
1        384542
2        352413
3        397163
4        483655
5        380499
6        402292
7        471955
8        489207
9        362300
10       503155
11       470987
12       421179
13       502504
14       374836
15       485953
16       427040
17       432338
18       411005
19       483846
20       431402
21       422297
22       481277
23       488409
24       380064
25       402035
26       475169
27       442947
28       435338
29       421135
          ...  
45590    427724
45591    457048
45592    478346
45593    416865
45594    500319
45595    382307
45596    372420
45597    466145
45598    476437
45599    379542
45600    368326
45601    473976
45602    416193
45603    356820
45604    364680
45605    490013
45606    441605
45607    491900
45608    394434
45609    493768
45610    391556
45611    460152
45612    484745
45613    398494
45614    371427
45615    350957
45616    471911
45617    398103
45618    355577
45619    368017
Na

TypeError: 'method' object is not subscriptable

In [9]:
a = pd.Series(['a', 'b', 'c', 'c'])

a = a.duplicated
print(a)

<bound method Series.duplicated of 0    a
1    b
2    c
3    c
dtype: object>
