In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
data = pd.read_csv('facies_vectors_0.csv')
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
facies_names = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
data = data.fillna(data['PE'].mean())

In [4]:
import numpy as np
import xgboost as xgb
import math
from sklearn.preprocessing import OneHotEncoder

print('start running example to used customized objective function')

params = {'max_depth': 2, 'eta': 0.1, 'silent': 1,
          'objective': 'multi:softprob', 'num_class': 9}

num_round = 2
def my_softmax(preds, dtrain):
    labels = dtrain.get_label()
    labels = OneHotEncoder(sparse=False, n_values=9).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels
    hess = preds * (1.0-preds)
#     hess = -preds
    K1 = [[0]*9]*dtrain.num_row()
    dtrain_len = dtrain.num_row()
    for object_ in range(1, dtrain_len-1):
        if(max(preds[object_-1]) == max(preds[object_+1])):
            for class_  in range(9):
                if (preds[object_+1][class_] - preds[object_-1][class_] != 0):
                    K1[object_][class_] = (preds[object_+1][class_] - preds[object_-1][class_])/abs(preds[object_+1][class_] - preds[object_-1][class_])
    K2 = [[0]*9]*dtrain.num_row()
    for object_ in range(1, dtrain_len-1):
        if(max(preds[object_-1]) == max(preds[object_+1])):
            for class_  in range(9):
                if (preds[object_+1][class_] - preds[object_][class_] != 0):
                    K2[object_][class_] = (preds[object_+1][class_] - preds[object_][class_])/abs(preds[object_+1][class_] - preds[object_][class_])
    K3 = [[0]*9]*dtrain.num_row()
    for object_ in range(1, dtrain_len-1):
        if(max(preds[object_-1]) == max(preds[object_+1])):
            for class_  in range(9):
                if (preds[object_][class_] - preds[object_-1][class_] != 0):
                    K3[object_][class_] = (preds[object_][class_] - preds[object_-1][class_])/abs(preds[object_][class_] - preds[object_-1][class_])
    
    L = [[0]*9]*dtrain.num_row()
    
    for object_ in range(1, dtrain_len-1):
        if(max(preds[object_-1]) == max(preds[object_+1])):
            for index in range(9):
                for class_ in range(9):
                    L[object_][index] = L[object_][index] - 2 * K1[object_][class_] * (preds[object_-1][class_] * preds[object_-1][index] - preds[object_+1][class_] * preds[object_+1][index])
                    L[object_][index] = L[object_][index] + K2[object_][class_] * (preds[object_][class_] * preds[object_][index] - preds[object_+1][class_] * preds[object_+1][index])
                    L[object_][index] = L[object_][index] + K3[object_][class_] * (preds[object_-1][class_] * preds[object_-1][index] - preds[object_][class_] * preds[object_][index])
                L[object_][index] = L[object_][index] - 2 * K1[object_][index] * (preds[object_+1][index] - preds[object_-1][index])
                L[object_][index] = L[object_][index] + K2[object_][index] * (preds[object_+1][index] - preds[object_][index])
                L[object_][index] = L[object_][index] + K3[object_][index] * (preds[object_][index] - preds[object_-1][index])
    grad = grad +  np.array(L)
    return grad.flatten(), hess.flatten()

def softmaxobj(preds, dtrain):
    """Softmax objective.
    Args:
        preds: (N, K) array, N = #data, K = #classes. 
        dtrain: DMatrix object with training data.
    
    Returns:
        grad: N*K array with gradient values.
        hess: N*K array with second-order gradient values.
    """
    # Label is a vector of class indices for each input example
    labels = dtrain.get_label()
    # When objective=softprob, preds has shape (N, K)
    labels = OneHotEncoder(sparse=False, n_values=9).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels
    hess = 2.0 * preds * (1.0-preds)
    # Return as 1-d vectors
    return grad.flatten(), hess.flatten()

def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)

def my_evalerror(preds, dtrain):
    labels = dtrain.get_label()
    evals = 0
    for index in range(len(labels)):
        if (labels[index] == preds[index]):
            evals += 1
    print("Eval error", evals/len(labels))
    return 'error', evals / len(labels)

acc = 0
# for well in set(data['Well Name']):
well = 'SHRIMPLIN'
print(well)
train = data[data['Well Name'] != well]
test = data[data['Well Name'] == well]
X_train = train[feature_names].values
y_train = train['Facies'].values
X_test = test[feature_names].values
y_test = test['Facies'].values

scaler = StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)
dtrain = xgb.DMatrix(X_train_norm, label=y_train)
dtest = xgb.DMatrix(X_test_norm, label=y_test)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
#     model = xgb.train(params, dtrain, 100)
#     bst = xgb.train(param, dtrain, num_round, watchlist, obj=my_logregobj)
model = xgb.Booster(params, [dtrain])
for _ in range(100):
    pred = model.predict(dtrain)
    g, h = my_softmax(pred, dtrain)
    model.boost(dtrain, g, h)
# Evalute
yhat = model.predict(dtest)
yhat_labels = np.argmax(yhat, axis=1)
#     ypred = bst.predict(dtest)
print(len(y_test), " ", len(yhat_labels))
acc += f1_score(y_test, yhat_labels, average='micro')
print('well, boosting of trees, ', acc/10)   

start running example to used customized objective function
SHRIMPLIN
471   471
well, boosting of trees,  0.021656050955414015


In [13]:
import numpy as np
import xgboost as xgb
import math
from sklearn.preprocessing import OneHotEncoder

print('start running example to used customized objective function')

params = {'max_depth': 2, 'eta': 0.1, 'silent': 1,
          'objective': 'multi:softprob', 'num_class': 9}

num_round = 2
def my_softmax(preds, dtrain):
    labels = dtrain.get_label()
    labels = OneHotEncoder(sparse=False, n_values=9).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels
    hess = preds * (1.0-preds)
    dtrain_len = dtrain.num_row()
    K3 = [[0]*9]*dtrain.num_row()
    for object_ in range(1, dtrain_len-1):
        for class_  in range(9):
            if (preds[object_][class_] - preds[object_-1][class_] != 0):
                K3[object_][class_] = (preds[object_][class_] - preds[object_-1][class_])/abs(preds[object_][class_] - preds[object_-1][class_])
    
    L = [[0]*9]*dtrain.num_row()
    
    for object_ in range(1, dtrain_len-1):
        ind1 = np.where(preds[object_-1] == max(preds[object_-1]))[0][0]
        ind2 = np.where(preds[object_+1] == max(preds[object_+1]))[0][0]
        if(ind1 == ind2):
            for index in range(9):
                for class_ in range(9):
                    L[object_][index] = L[object_][index] + K3[object_][class_] * (preds[object_-1][class_] * preds[object_-1][index] - preds[object_][class_] * preds[object_][index])
                L[object_][index] = L[object_][index] + K3[object_][index] * (preds[object_][index] - preds[object_-1][index])
    grad = grad + np.array(L)
    
#     H = [[0]*9]*dtrain.num_row()
#     for object_ in range(1, dtrain_len-1):
#         if(max(preds[object_-1]) == max(preds[object_+1])):
#             for index in range(9):
#                 for class_ in range(9):
#                     H[object_][index] = H[object_][index] + (preds[object_][index]*preds[object_][class_] - preds[object_-1][index]*preds[object_-1][class_] + 2*preds[object_-1][class_]*preds[object_-1][index]*preds[object_-1][index] - 2*preds[object_-1][class_]*preds[object_][index]*preds[object_][index])*abs(preds[object_-1][index] - preds[object_][index])/(preds[object_-1][index] - preds[object_][index])
#                 H[object_][index] = H[object_][index] + (preds[object_][index] * preds[object_][index] - preds[object_-1][index] * preds[object_-1][index])*abs(preds[object_-1][index] - preds[object_][index])/(preds[object_-1][index] - preds[object_][index])
#                 H[object_][index] = H[object_][index] + (2*(preds[object_-1][index] - preds[object_][index] + preds[object_][index]* preds[object_][index] - preds[object_-1][index] * preds[object_-1][index]) - 1)*abs(preds[object_-1][index] - preds[object_][index])/(preds[object_-1][index] - preds[object_][index])
#     hess = hess + np.array(H)
    return grad.flatten(), hess.flatten()

def softmaxobj(preds, dtrain):
    """Softmax objective.
    Args:
        preds: (N, K) array, N = #data, K = #classes. 
        dtrain: DMatrix object with training data.
    
    Returns:
        grad: N*K array with gradient values.
        hess: N*K array with second-order gradient values.
    """
    # Label is a vector of class indices for each input example
    labels = dtrain.get_label()
    # When objective=softprob, preds has shape (N, K)
    labels = OneHotEncoder(sparse=False, n_values=9).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels
    hess = 2.0 * preds * (1.0-preds)
    # Return as 1-d vectors
    return grad.flatten(), hess.flatten()

def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)

def my_evalerror(preds, dtrain):
    labels = dtrain.get_label()
    evals = 0
    for index in range(len(labels)):
        if (labels[index] == preds[index]):
            evals += 1
    print("Eval error", evals/len(labels))
    return 'error', evals / len(labels)

acc = 0
# for well in set(data['Well Name']):
for well in set(data['Well Name']):
# well = 'SHRIMPLIN'
    print(well)
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values

    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    dtrain = xgb.DMatrix(X_train_norm, label=y_train)
    dtest = xgb.DMatrix(X_test_norm, label=y_test)
    watchlist = [(dtest, 'eval'), (dtrain, 'train')]
    #     model = xgb.train(params, dtrain, 100)
    #     bst = xgb.train(param, dtrain, num_round, watchlist, obj=my_logregobj)
    model = xgb.Booster(params, [dtrain])
    for _ in range(100):
        pred = model.predict(dtrain)
        g, h = my_softmax(pred, dtrain)
        model.boost(dtrain, g, h)
    # Evalute
    yhat = model.predict(dtest)
    yhat_labels = np.argmax(yhat, axis=1)
    #     ypred = bst.predict(dtest)
    print(len(y_test), " ", len(yhat_labels))
    acc += f1_score(y_test, yhat_labels, average='micro')
print('well, boosting of trees, ', acc/10)   

start running example to used customized objective function
KIMZEY A
439   439
ALEXANDER D
466   466
SHRIMPLIN
471   471
Recruit F9
80   80
CROSS H CATTLE
501   501
SHANKLE
449   449
CHURCHMAN BIBLE
404   404
NEWBY
463   463
LUKE G U
461   461
NOLAN
415   415
well, boosting of trees,  0.509848495390181


In [32]:
yhat1 = model.predict(dtrain)
yhat1_labels = np.argmax(yhat1, axis=1)
print(f1_score(y_train, yhat1_labels, average='micro'))

0.584703947368421
