# Prédiction du genre d'une personne à partir de sa photo
## MLPClassifier

auteur : Rui SONG

### Imports and initializations

In [12]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [13]:
# Critere de performance
def compute_pred_score(y_true, y_pred):
    y_pred_unq =  np.unique(y_pred)
    for i in y_pred_unq:
        if((i != -1) & (i!= 1) & (i!= 0) ):
            raise ValueError('The predictions can contain only -1, 1, or 0!')
    y_comp = y_true * y_pred
    score = float(10*np.sum(y_comp == -1) + np.sum(y_comp == 0))
    score /= y_comp.shape[0]
    return score

### Data loading

In [14]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

X_train_fname = 'training_templates.csv'
y_train_fname = 'training_labels.txt'
X_test_fname  = 'testing_templates.csv'
X_train = pd.read_csv(X_train_fname, sep=',', header=None).values
X_test  = pd.read_csv(X_test_fname,  sep=',', header=None).values
y_train = np.loadtxt(y_train_fname, dtype=np.int)

X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)

pca = PCA()  
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

### Training

In [15]:
from sklearn.metrics import roc_auc_score
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

np.random.seed(42)

clf = MLPClassifier(hidden_layer_sizes=(250, 250, 250), alpha=0.000001, tol=0.00001)
bagging = BaggingClassifier(base_estimator=clf, n_estimators=150, max_samples=0.5, 
                            max_features=0.7, bootstrap=True, bootstrap_features=True, n_jobs=-1,verbose=True)

bagging.fit(X_train, y_train)

[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed: 76.3min remaining: 229.0min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed: 83.1min finished


BaggingClassifier(base_estimator=MLPClassifier(activation='relu', alpha=1e-06, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(250, 250, 250), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=1e-05, validation_fraction=0.1,
       verbose=False, warm_start=False),
         bootstrap=True, bootstrap_features=True, max_features=0.7,
         max_samples=0.5, n_estimators=150, n_jobs=-1, oob_score=False,
         random_state=None, verbose=True, warm_start=False)

In [16]:
# Prediction
y_pred_train =  bagging.predict(X_train)
y_predict_train_proba = bagging.predict_proba(X_train)[:,0]
        
for i in range(len(y_pred_train)):
    if (y_predict_train_proba[i]<0.9)and(y_predict_train_proba[i]>1-0.9):
        y_pred_train[i]=0

# score
score = compute_pred_score(y_train, y_pred_train)
print('Score sur le train : %s' % score)

[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.2min remaining:  3.5min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.2min finished
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.2min remaining:  3.5min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.2min finished


Score sur le train : 0.132945075758


## Predictions

Génération de la prédiction sur le test et enregistrement du fichier à soumettre sur le site:

In [17]:
y_pred = bagging.predict(X_test)
y_predict_proba = bagging.predict_proba(X_test)[:,0]

for i in range(len(y_pred)):
    if (y_predict_proba[i]<0.675)and(y_predict_proba[i]>1-0.675):
        y_pred[i]=0
print np.unique(y_pred)

np.savetxt('y_pred.txt', y_pred, fmt='%d')

[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    6.8s remaining:   20.4s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    9.2s finished
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    6.6s remaining:   19.9s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    9.0s finished


[-1  0  1]


In [18]:
# 0.164312617702