### Import module, function and data

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import precision_score, recall_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import validation_curve
from pprint import pprint
import matplotlib.pyplot as plt
from hyperopt.pyll.stochastic import sample
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

from sklearn.ensemble import RandomForestClassifier # rf분류기

In [5]:
def F1(y_pred, dtrain):
    labels = dtrain.get_label()
    
    pre = precision_score(y_true = labels, y_pred = y_pred, average=None)
    rec = recall_score(y_true = labels, y_pred = y_pred, average=None)
    f1_score = 8/(sum(1/pre) + sum(1/rec))

    return 'f1', f1_score

In [6]:
def f1(X_val, y_val, model,mapping):
    """
    Model evaluation function for multiclass classification problem
    1) F-1 score, Precision, Recall
    2) ROC curve, PR curve는 추후에 생각
    """
 
    #### predict the value
    y_pred = model.predict(X_val)

    #print('-'*50)
    #print('2. F1-score')
    
    # inverse pre/ rec
    pre = precision_score(y_true = y_val, y_pred = y_pred, average=None)
    rec = recall_score(y_true = y_val, y_pred = y_pred, average=None)

    # f1 measure
    f1_score = 8/(sum(1/pre) + sum(1/rec))
    
    # view - precision recall
    table = pd.DataFrame([])

    for i,k in enumerate(mapping.keys()):
        table[k] = [pre[i],rec[i]]
    table.index = ['precision','recall']
    # print(table)
    
    # view - f1
    #print('F1_score %.3f'%f1_score)
    #print('='*50)
    return f1_score

In [7]:
#### load data set
## train
X_train1 = pd.read_csv('temp_data/X_train_stat.csv')
X_train1.columns = ['new_id'] + [x +'_stat' for x in X_train1.columns[1:]]
X_train2 = pd.read_csv('temp_data/X_train_easy_time.csv').drop('new_id',axis=1)
X_train2.columns = [x +'_basic_time' for x in X_train2.columns]
X_train3 = pd.read_csv('temp_data/X_train_게임활동_time.csv').drop('new_id',axis=1)
X_train3.columns = [x +'_time_series' for x in X_train3.columns]
X_train6 = pd.read_csv('temp_data/X_train_ratio.csv').drop('new_id',axis=1)
X_train6.fillna(0.0)
X_train9 = pd.read_csv('temp_data/X_train_act_comb_1.csv').drop('new_id',axis=1)
X_train10 = pd.read_csv('temp_data/train_playpattern_mean_encoding.csv').drop('new_id',axis=1)

## test
X_test1 = pd.read_csv('temp_data/X_test_stat.csv')
X_test1.columns = X_train1.columns
X_test2 = pd.read_csv('temp_data/X_test_easy_time.csv').drop('new_id',axis=1)
X_test2.columns = X_train2.columns
X_test3 = pd.read_csv('temp_data/X_test_게임활동_time.csv').drop('new_id',axis=1)
X_test3.columns = X_train3.columns
X_test6 = pd.read_csv('temp_data/X_test_ratio.csv').drop('new_id',axis=1)
X_test6.fillna(0.0)
X_test9 = pd.read_csv('temp_data/X_test_act_comb_1.csv').drop('new_id',axis=1)
X_test10 = pd.read_csv('temp_data/test_playpattern_mean_encoding.csv').drop('new_id',axis=1)


## guild and trade and party
X_train4 = pd.read_csv('temp_data/temp_guild_train.csv').drop('new_id',axis=1)
X_train5 = pd.read_csv('temp_data/temp_trade_train.csv').drop('new_id',axis=1)
X_test4 = pd.read_csv('temp_data/temp_guild_test.csv').drop('new_id',axis=1)
X_test5 = pd.read_csv('temp_data/temp_trade_test.csv').drop('new_id',axis=1)
X_train7 = pd.read_csv('temp_data/X_train_party.csv').drop('new_id',axis=1)
X_test7 = pd.read_csv('temp_data/X_test_party.csv').drop('new_id',axis=1)
X_train8 = pd.read_csv('temp_data/training_trade.csv').drop('new_id',axis=1)
X_test8 = pd.read_csv('temp_data/test_trade.csv').drop('new_id',axis=1)

In [8]:
X_train = pd.concat((X_train1,X_train2,X_train3,X_train4,X_train5, X_train6,X_train7,X_train8,X_train9,X_train10),axis=1).drop('new_id',axis=1)
X_test = pd.concat((X_test1,X_test2,X_test3,X_test4,X_test5,X_test6,X_test7,X_test8,X_test9,X_test10),axis=1).drop('new_id',axis=1)

In [9]:
#### load class
train_label = pd.read_csv('temp_data/train_label_lite.csv')
# hasher = pd.read_csv('test_id.csv')
label_map = {'retained':0,'2month':1,'month':2,'week':3}
y_train = pd.Series([label_map[l] for l in train_label.label])

In [10]:
### inf가 존재
X_train = X_train.drop('cnt_use_buffitem_by_game_combat_time',axis = 1)
X_test = X_test.drop('cnt_use_buffitem_by_game_combat_time',axis = 1)

---

### Feature selection

In [11]:
X_train.shape

(100000, 1161)

In [12]:
#### RF 모델
model = RandomForestClassifier(criterion='gini','max_features': 243, 'min_samples_leaf': 4,n_estimators=300,random_state= 7, n_jobs=-1)
X_train_rf = X_train.fillna(0)

In [13]:
#### cross validation
kfold = StratifiedKFold(n_splits = 10 ,random_state = 7).split(X_train_rf, y_train)
scores = []
for k, (train, test) in enumerate(kfold):
    model.fit(X_train_rf.iloc[train,:], y_train[train])
    score = f1(X_train_rf.iloc[test,:], y_train[test], model,label_map)
    scores.append(score)
    print('Fold: %s, Class dist.: %s, F1: %.3f' % (k+1,np.bincount(y_train[train]), score))
    
print('\nCV F1: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))

Fold: 1, Class dist.: [22500 22500 22500 22500], F1: 0.715
Fold: 2, Class dist.: [22500 22500 22500 22500], F1: 0.718
Fold: 3, Class dist.: [22500 22500 22500 22500], F1: 0.725
Fold: 4, Class dist.: [22500 22500 22500 22500], F1: 0.726
Fold: 5, Class dist.: [22500 22500 22500 22500], F1: 0.714
Fold: 6, Class dist.: [22500 22500 22500 22500], F1: 0.718
Fold: 7, Class dist.: [22500 22500 22500 22500], F1: 0.715
Fold: 8, Class dist.: [22500 22500 22500 22500], F1: 0.712
Fold: 9, Class dist.: [22500 22500 22500 22500], F1: 0.707
Fold: 10, Class dist.: [22500 22500 22500 22500], F1: 0.711

CV F1: 0.716 +/- 0.006


In [14]:
#### feature selection by RF
importances = model.feature_importances_
std = np.std([tree.feature_importances_ for tree in model.estimators_],axis=0)
indices = np.argsort(importances) # ascending

#### feature ranking
feature_ranking = [(indices[f],importances[indices[f]]) for f in range(X_train.shape[1])]

In [15]:
#### state of art feature.... 300개 정도... importance ratio 조정!!!
NUM_OF_FEATURES = len([(i,f) for i, f in feature_ranking if f > 0.0001])

In [16]:
NUM_OF_FEATURES

693

In [17]:
col = pd.DataFrame({'importance': model.feature_importances_, 'feature': X_train.columns}).sort_values(by=['importance'], ascending=[False])[:NUM_OF_FEATURES]['feature'].values

In [18]:
#### FEATURE SELECTION
X_train = X_train[col]
X_test = X_test[col]
X_train.shape

(100000, 693)

---

### hyperopt Xgb

In [None]:
def xgb_classifier(params): # hyperopt의 objective function은 params를 input으로 받는다.
    
    global obj_call_count, cur_best_score, cur_best_std, X_train, y_train # 우리가 input할 데이터는 global변수화!
    
    obj_call_count += 1
    print('\nXgboost objective call #{} cur_best_score={:7.5f} cur_best_std={:7.5f}'.format(obj_call_count,cur_best_score,cur_best_std) )
    
    #### sampling parameters from the hyperparameter params
    xgb_params = sample(params)
    model = XGBClassifier(**xgb_params)
    
    #### 5 fold cross validation for rf
    kfold = StratifiedKFold(n_splits = 5 ,random_state = 7,shuffle=True).split(X_train, y_train)
    scores = []
    predict_set = []
    for k, (train, test) in enumerate(kfold):
        model.fit(X_train.iloc[train,:], y_train[train],eval_metric = F1)
        scores.append(score)
        print('Fold: %s, Class dist.: %s, F1: %.3f' % (k+1,np.bincount(y_train[train]), score))
        
        ### predict
        y_pred = model.predict(X_train.iloc[test,:])
        predict_set += [(x,inv_map[y_pred[i]]) for i,x in enumerate(test)]
        
    f1_mean = np.mean(scores)
    f1_std = np.std(scores)
    
    print(xgb_params)
    print('5-fold of Xgboost F1: %.3f +/- %.3f' % (f1_mean, f1_std))
    
    if f1_mean > cur_best_score:
        cur_best_score = f1_mean
        cur_best_std = f1_std
        
    #### minimize metric
    loss = 1 - f1_mean
    loss_var = 1 - np.var(scores)
    
    return {'loss': loss , 'loss_variance': loss_var ,'status':STATUS_OK ,'attachments':{'pred_cv':predict_set}}

In [None]:
obj_call_count = 0
cur_best_score = 0
cur_best_std = 0

In [None]:
param_space = {
    'n_estimators': 369,
    'learning_rate': 0.1
    'min_child_weight': hp.choice('min_child_weight',range(3,10))
    'max_depth':12

    'reg_alpha': hp.quniform('reg_alpha', 0, 0.1, 0.001)
    'reg_lambda': hp.quniform('reg_lambda', 0, 0.1, 0.001)
    'subsample': hp.quniform('subsample', 0.6, 1, 0.03)
    'colsample_bytree': hp.quniform('colsample_bytree', 0.6, 1, 0.03)
    
    'num_class':4
    'objective': 'multi:softmax'
    'seed': 7
    
    'n_gpus' : -1
    'tree_method' : 'gpu_hist'
    'silent' : 0
    }

In [None]:
trials = Trials()

---

### step 1 :  tuning n_estimators with cross validation

* 새로운 피쳐를 많이 넣으면 다시 돌려주자

In [20]:
#### xgb
grid_result = []
param = {}
#### XGB parameters
## General Parameters
param['n_gpus'] = -1
param['tree_method'] = 'gpu_hist'
param['silent'] = 0

## Booster Parameters
param['n_estimators'] = 369 #요기...
param['learning_rate'] = 0.1
param['min_child_weight'] = 4
param['max_depth'] = 10
param['gamma'] = 0
param['reg_alpha'] = 0
param['reg_lambda'] = 0
param['subsample'] = 0.95
param['colsample_bytree'] = 0.75
param['scale_pos_weight'] = 1

## Learning task parameters
param['num_class'] = 4
param['objective'] = 'multi:softmax'
param['seed'] = 7

cv_folds = 5

In [21]:
 #### step 1 : tuning n_estimators with cross validation
print("===============================================")
print("Find the n_estimators")
xgtrain = xgb.DMatrix(X_train.values, label= y_train.values.reshape(-1,1))
cvresult = xgb.cv(param, xgtrain, num_boost_round = param['n_estimators'], nfold = 5, metrics = "mlogloss", early_stopping_rounds = 50)
print("Optimal n_estimators : %d"%cvresult.shape[0])

Find the n_estimators
Optimal n_estimators : 227


---

### step 2: Tune max_depth and min_child_weight

In [19]:
#### xgb
grid_result = []
param = {}
#### XGB parameters
## General Parameters
param['n_gpus'] = -1
param['tree_method'] = 'gpu_hist'
param['silent'] = 0

## Booster Parameters
param['n_estimators'] = 369 #요기...
param['learning_rate'] = 0.1
param['min_child_weight'] = 6
param['max_depth'] = 12
param['gamma'] = 0
param['reg_alpha'] = 0
param['reg_lambda'] = 0
param['subsample'] = 0.95
param['colsample_bytree'] = 0.75
param['scale_pos_weight'] = 1

## Learning task parameters
param['num_class'] = 4
param['objective'] = 'multi:softmax'
param['seed'] = 7

cv_folds = 5

---

### step 3: Tune subsample and colsample_bytree

In [17]:
param

{'n_gpus': -1,
 'tree_method': 'gpu_hist',
 'silent': 0,
 'n_estimators': 369,
 'learning_rate': 0.1,
 'min_child_weight': 4,
 'max_depth': 10,
 'gamma': 0,
 'reg_alpha': 0,
 'reg_lambda': 0,
 'subsample': 0.95,
 'colsample_bytree': 0.75,
 'scale_pos_weight': 1,
 'num_class': 4,
 'objective': 'multi:softmax',
 'seed': 7}

In [18]:
#### step 4: Tune subsample and colsample_bytree
print()
print("===============================================")
param_tune_sub = {}
param_tune_sub['subsample'] = [0.85,0.88,0.9,0.93,0.95,0.98]
param_tune_sub['colsample_bytree'] = [0.7,0.75,0.8,0.85,0.9,0.95,0.98]

grid = GridSearchCV(estimator = XGBClassifier(**param),param_grid = param_tune_sub, scoring = 'f1_macro',iid=False ,n_jobs = 1, cv = cv_folds)
grid.fit(X_train,y_train)




GridSearchCV(cv=5, error_score='raise',
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=10, min_child_weight=4, missing=None, n_estimators=369,
       n_gpus=-1, n_jobs=1, nthread=None, num_class=4,
       objective='multi:softmax', random_state=0, reg_alpha=0,
       reg_lambda=0, scale_pos_weight=1, seed=7, silent=0, subsample=0.95,
       tree_method='gpu_hist'),
       fit_params=None, iid=False, n_jobs=1,
       param_grid={'subsample': [0.85, 0.88, 0.9, 0.93, 0.95, 0.98], 'colsample_bytree': [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='f1_macro', verbose=0)

In [19]:
print("Find the subsample and colsample_bytree")
print("Grid Scores")
pprint(grid.grid_scores_)
grid_result.append(grid.grid_scores_)
print("Best parameter - subsample and colsample_bytree")
print(grid.best_params_)

## update
param.update(grid.best_params_)

Find the subsample and colsample_bytree
Grid Scores
[mean: 0.73660, std: 0.00431, params: {'colsample_bytree': 0.7, 'subsample': 0.85},
 mean: 0.73731, std: 0.00460, params: {'colsample_bytree': 0.7, 'subsample': 0.88},
 mean: 0.73645, std: 0.00507, params: {'colsample_bytree': 0.7, 'subsample': 0.9},
 mean: 0.73778, std: 0.00332, params: {'colsample_bytree': 0.7, 'subsample': 0.93},
 mean: 0.73750, std: 0.00445, params: {'colsample_bytree': 0.7, 'subsample': 0.95},
 mean: 0.73715, std: 0.00490, params: {'colsample_bytree': 0.7, 'subsample': 0.98},
 mean: 0.73704, std: 0.00491, params: {'colsample_bytree': 0.75, 'subsample': 0.85},
 mean: 0.73765, std: 0.00420, params: {'colsample_bytree': 0.75, 'subsample': 0.88},
 mean: 0.73817, std: 0.00499, params: {'colsample_bytree': 0.75, 'subsample': 0.9},
 mean: 0.73712, std: 0.00450, params: {'colsample_bytree': 0.75, 'subsample': 0.93},
 mean: 0.73813, std: 0.00392, params: {'colsample_bytree': 0.75, 'subsample': 0.95},
 mean: 0.73716, std: 



---

### step 5: Regularization alpha = L1, lambda = L2

In [21]:
#### step 5: Regularization alpha = L1, lambda = L2
print()
print("===============================================")
param_tune_reg = {}
param_tune_reg['reg_alpha'] = [0.001,0.003,0.006,0.01,0.03,0.06,0.1]
param_tune_reg['reg_lambda'] = [0.001,0.003,0.006,0.01,0.03,0.06,0.1]

grid = GridSearchCV(estimator = XGBClassifier(**param),param_grid = param_tune_reg, scoring = 'f1_macro',iid=False ,n_jobs = 1, cv = cv_folds)
grid.fit(X_train,y_train)





GridSearchCV(cv=5, error_score='raise',
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=10, min_child_weight=4, missing=None, n_estimators=369,
       n_gpus=-1, n_jobs=1, nthread=None, num_class=4,
       objective='multi:softmax', random_state=0, reg_alpha=0,
       reg_lambda=0, scale_pos_weight=1, seed=7, silent=0, subsample=0.9,
       tree_method='gpu_hist'),
       fit_params=None, iid=False, n_jobs=1,
       param_grid={'reg_alpha': [0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1], 'reg_lambda': [0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='f1_macro', verbose=0)

In [22]:

print("Find the Regularization alpha = L1, lambda = L2")
print("Grid Scores")
pprint(grid.grid_scores_)
grid_result.append(grid.grid_scores_)
print("Best parameter - Regularization alpha = L1, lambda = L2")
print(grid.best_params_)

## update
param.update(grid.best_params_)

Find the Regularization alpha = L1, lambda = L2
Grid Scores
[mean: 0.73718, std: 0.00459, params: {'reg_alpha': 0.001, 'reg_lambda': 0.001},
 mean: 0.73765, std: 0.00470, params: {'reg_alpha': 0.001, 'reg_lambda': 0.003},
 mean: 0.73747, std: 0.00404, params: {'reg_alpha': 0.001, 'reg_lambda': 0.006},
 mean: 0.73854, std: 0.00530, params: {'reg_alpha': 0.001, 'reg_lambda': 0.01},
 mean: 0.73724, std: 0.00451, params: {'reg_alpha': 0.001, 'reg_lambda': 0.03},
 mean: 0.73842, std: 0.00415, params: {'reg_alpha': 0.001, 'reg_lambda': 0.06},
 mean: 0.73826, std: 0.00473, params: {'reg_alpha': 0.001, 'reg_lambda': 0.1},
 mean: 0.73814, std: 0.00449, params: {'reg_alpha': 0.003, 'reg_lambda': 0.001},
 mean: 0.73810, std: 0.00458, params: {'reg_alpha': 0.003, 'reg_lambda': 0.003},
 mean: 0.73762, std: 0.00490, params: {'reg_alpha': 0.003, 'reg_lambda': 0.006},
 mean: 0.73756, std: 0.00415, params: {'reg_alpha': 0.003, 'reg_lambda': 0.01},
 mean: 0.73779, std: 0.00445, params: {'reg_alpha': 0.0



In [23]:
param

{'n_gpus': -1,
 'tree_method': 'gpu_hist',
 'silent': 0,
 'n_estimators': 369,
 'learning_rate': 0.1,
 'min_child_weight': 4,
 'max_depth': 10,
 'gamma': 0,
 'reg_alpha': 0.1,
 'reg_lambda': 0.03,
 'subsample': 0.9,
 'colsample_bytree': 0.75,
 'scale_pos_weight': 1,
 'num_class': 4,
 'objective': 'multi:softmax',
 'seed': 7}

### Final step : Tune the learning rate

* use small learning rate

In [15]:
#### xgb
grid_result = []
param = {}
#### XGB parameters
## General Parameters
param['n_gpus'] = -1
param['tree_method'] = 'gpu_hist'
param['silent'] = 0

## Booster Parameters
param['n_estimators'] = 227 #요기...
param['learning_rate'] = 0.1
param['min_child_weight'] = 4
param['max_depth'] = 10
param['gamma'] = 0
param['reg_alpha'] = 0.1
param['reg_lambda'] = 0.03
param['subsample'] = 0.9
param['colsample_bytree'] = 0.75
param['scale_pos_weight'] = 1

## Learning task parameters
param['num_class'] = 4
param['objective'] = 'multi:softmax'
param['seed'] = 7

cv_folds = 5

In [20]:
#### 0.01 / 2326
cvresult

Unnamed: 0,train-mlogloss-mean,train-mlogloss-std,test-mlogloss-mean,test-mlogloss-std
0,1.376122,0.000031,1.376718,0.000056
1,1.366150,0.000068,1.367342,0.000114
2,1.356351,0.000040,1.358133,0.000162
3,1.346763,0.000049,1.349125,0.000184
4,1.337293,0.000081,1.340250,0.000196
5,1.327997,0.000108,1.331541,0.000231
6,1.318822,0.000143,1.322930,0.000285
7,1.309828,0.000149,1.314488,0.000309
8,1.300931,0.000169,1.306182,0.000338
9,1.292124,0.000207,1.297967,0.000347


---

* best paramter test

In [15]:
#### xgb
grid_result = []
param = {}
#### XGB parameters
## General Parameters
param['n_gpus'] = -1
param['tree_method'] = 'gpu_hist'
param['silent'] = 0

## Booster Parameters
param['n_estimators'] = 369 #요기...
param['learning_rate'] = 0.1
param['min_child_weight'] = 2
param['max_depth'] = 10
param['gamma'] = 0
param['reg_alpha'] = 0.01
param['reg_lambda'] = 0.05
param['subsample'] = 0.95
param['colsample_bytree'] = 0.75
param['scale_pos_weight'] = 1

## Learning task parameters
param['num_class'] = 4
param['objective'] = 'multi:softmax'
param['seed'] = 7

cv_folds = 5

In [16]:
#### 그리드....
print()
print("===============================================")
param_tune_reg = {}
param_tune_reg['reg_alpha'] = [0.01]
param_tune_reg['reg_lambda'] = [0.05]

grid = GridSearchCV(estimator = XGBClassifier(**param),param_grid = param_tune_reg, scoring = 'f1_macro',iid=False ,n_jobs = 1, cv = cv_folds)
grid.fit(X_train,y_train)





GridSearchCV(cv=5, error_score='raise',
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=10, min_child_weight=2, missing=None, n_estimators=369,
       n_gpus=-1, n_jobs=1, nthread=None, num_class=4,
       objective='multi:softmax', random_state=0, reg_alpha=0.01,
       reg_lambda=0.05, scale_pos_weight=1, seed=7, silent=0,
       subsample=0.95, tree_method='gpu_hist'),
       fit_params=None, iid=False, n_jobs=1,
       param_grid={'reg_alpha': [0.01], 'reg_lambda': [0.05]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='f1_macro', verbose=0)

In [17]:

print("Find the Regularization alpha = L1, lambda = L2")
print("Grid Scores")
pprint(grid.grid_scores_)
grid_result.append(grid.grid_scores_)
print("Best parameter - Regularization alpha = L1, lambda = L2")
print(grid.best_params_)

Find the Regularization alpha = L1, lambda = L2
Grid Scores
[mean: 0.73676, std: 0.00386, params: {'reg_alpha': 0.01, 'reg_lambda': 0.05}]
Best parameter - Regularization alpha = L1, lambda = L2
{'reg_alpha': 0.01, 'reg_lambda': 0.05}




In [18]:
!pip install hyperopt

Collecting hyperopt
[?25l  Downloading https://files.pythonhosted.org/packages/ce/9f/f6324af3fc43f352e568b5850695c30ed7dd14af06a94f97953ff9187569/hyperopt-0.1.1-py3-none-any.whl (117kB)
[K    100% |████████████████████████████████| 122kB 1.5MB/s ta 0:00:01
Collecting future (from hyperopt)
[?25l  Downloading https://files.pythonhosted.org/packages/00/2b/8d082ddfed935f3608cc61140df6dcbf0edea1bc3ab52fb6c29ae3e81e85/future-0.16.0.tar.gz (824kB)
[K    100% |████████████████████████████████| 829kB 3.9MB/s ta 0:00:01
[?25hCollecting pymongo (from hyperopt)
[?25l  Downloading https://files.pythonhosted.org/packages/11/88/dd1f8c4281a60791b043f55e338d0521049208f21e3de19ddc9c160dbbef/pymongo-3.7.1-cp36-cp36m-manylinux1_x86_64.whl (405kB)
[K    100% |████████████████████████████████| 409kB 5.0MB/s ta 0:00:01
Building wheels for collected packages: future
  Running setup.py bdist_wheel for future ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/bf/c9/a3/c538d90ef17