In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
data = pd.read_csv('facies_vectors_0.csv')
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS','GR_diff_up', 'ILD_log10_diff_up', 'DeltaPHI_diff_up', 'PHIND_diff_up', 'PE_diff_up', 'NM_M_diff_up', 'RELPOS_diff_up']
facies_names = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
data = data.fillna(data['PE'].mean())

In [4]:
def find_diff(row, well):
    if len(prev_depth_features[well]) == 0:
        prev_depth_features[well] = row.values[4:]
        return
    diff = row.values[4:] - prev_depth_features[well]
    prev_depth_features[well] = row.values[4:]
    return diff
data_well = dict()
data_well_inverse = dict()
prev_depth_features = dict()
new_data = pd.DataFrame()
prev_class= dict()
data_save = pd.DataFrame()
for well in set(data['Well Name']):
    prev_depth_features[well] = []
    prev_class[well] = []
    data_well[well] = data[data['Well Name'] == well]
    data_well[well] = data_well[well].sort_values(by=['Depth'])
    data_save = data_well[well].iloc[::-1]
    data_well[well]['diff_up'] = data_well[well].apply(lambda row: find_diff(row, well), axis=1)
    prev_depth_features[well] = []
    prev_class[well] = []

    data_well[well] = data_well[well].dropna()
    data_well[well]['GR_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][0], axis=1)
    data_well[well]['ILD_log10_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][1], axis=1)
    data_well[well]['DeltaPHI_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][2], axis=1)
    data_well[well]['PHIND_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][3], axis=1)
    data_well[well]['PE_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][4], axis=1)
    data_well[well]['NM_M_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][5], axis=1)
    data_well[well]['RELPOS_diff_up'] = data_well[well].apply(lambda row: row['diff_up'][6], axis=1)

    new_data = pd.concat([new_data, data_well[well]])
    new_data = new_data.drop(['diff_up'], axis=1)


In [5]:
def augment_features_window(X, N_neig):
    N_row = X.shape[0]
    N_feat = X.shape[1]
    X = np.vstack((np.zeros((N_neig, N_feat)),np.zeros((N_neig, N_feat)), X, np.zeros((N_neig, N_feat)),np.zeros((N_neig, N_feat))))
    X_aug = np.zeros((N_row, N_feat*(4*N_neig+1)))
    for r in np.arange(N_row) + N_neig:
        this_row = []
        for c in np.arange(-N_neig,N_neig+1):
            this_row = np.hstack((this_row, X[r+c]))
            if c != 0:
                this_row = np.hstack((this_row, (X[r] + X[r+c])/2))
        #print(len(this_row))
        X_aug[r-N_neig] = this_row

    return X_aug

def augment_features_gradient(X, depth):
    d_diff = np.diff(depth).reshape((-1, 1))
    d_diff[d_diff==0] = 0.001
    X_diff = np.diff(X, axis=0)
    X_grad = X_diff / d_diff
    X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))
    
    return X_grad

def augment_features(X, well, depth, N_neig=1):
    X_aug = np.zeros((X.shape[0], X.shape[1]*(4*N_neig+1)))
    for w in np.unique(well):
        w_idx = np.where(well == w)[0]
        X_aug_win = augment_features_window(X[w_idx, :], N_neig)
        #print(X_aug_win)
        #X_aug_grad = augment_features_gradient(X[w_idx, :], depth[w_idx])
        #print(X_aug_grad)
        X_aug[w_idx, :] = X_aug_win
        #X_aug[w_idx, :] = np.concatenate((X_aug_win, X_aug_grad), axis=1)
        
    return X_aug


In [7]:
import numpy as np
import xgboost as xgb
import math
from sklearn.preprocessing import OneHotEncoder

print('start running example to used customized objective function')

params = {'max_depth': 2, 'eta': 0.1, 'silent': 1,
          'objective': 'multi:softprob', 'num_class': 2}

params_group1 = {'max_depth': 2, 'eta': 0.1, 'silent': 1,
          'objective': 'multi:softprob', 'num_class': 2}

num_round = 2
def my_softmax(preds, dtrain):
    labels = dtrain.get_label()
    labels_hot = OneHotEncoder(sparse=False, n_values=2).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels_hot
    hess = preds * (1.0-preds)

    return grad.flatten(), hess.flatten()

def my_softmax_group1(preds, dtrain):
    labels = dtrain.get_label()
    labels_hot = OneHotEncoder(sparse=False, n_values=2).fit_transform(labels.reshape(-1, 1))
    grad = preds - labels_hot
    hess = preds * (1.0-preds)

    return grad.flatten(), hess.flatten()

start running example to used customized objective function


In [8]:
def most_similar(y_res, y_test):
    for index in range(len(y_res)):
        if (y_res[index] != y_test[index]):
            classes[y_res[index], y_test[index]] += 1
            

In [9]:
def find_sum_classes(y_test):
    for index in range(len(y_test)):
        sum_classes[y_test[index]] += 1

In [10]:
def similar_percent():
    for class1 in range(9):           
        for class2 in range(9):
            classes[class1, class2] = classes[class1, class2]/sum_classes[class1]

In [13]:
import numpy.random as random
import copy
test = dict()
train = dict()
acc = 0
acc_all = 0
wells = set(data['Well Name'])
classes = dict()
for class1 in range(9):
    for class2 in range (9):
        classes[class1, class2] = 0
sum_classes = dict()
for class1 in range(9):
    sum_classes[class1] = 0
group1 = [7, 8]
group2 = [1, 2, 3, 4, 5, 6, 0]
for well in wells:

    print(well)
    test[well] = new_data[new_data['Well Name'] == well]
    train[well] = new_data[new_data['Well Name'] != well]
    X_train = copy.deepcopy(train[well][feature_names].values)
    y_train = copy.deepcopy(train[well]['Facies'].values )
    X_test = copy.deepcopy(test[well][feature_names].values )
    y_test = copy.deepcopy(test[well]['Facies'].values )
    well_train = train[well]['Well Name'].values
    well_test = test[well]['Well Name'].values
    depth_train = train[well]['Depth'].values
    depth_test = test[well]['Depth'].values    
    X_aug_train = augment_features(X_train,well_train,depth_train)
    X_aug_test = augment_features(X_test,well_test,depth_test)

    robust = preprocessing.RobustScaler(quantile_range=(25.0, 75.0)).fit(X_aug_train)
    X_train_robust = robust.transform(X_aug_train)
    X_test_robust = robust.transform(X_aug_test)

    scaler = StandardScaler().fit(X_train_robust)
    X_train_robust_norm = scaler.transform(X_train_robust)
    X_test_robust_norm = scaler.transform(X_test_robust)

    y_train_01 = copy.deepcopy(y_train)
    for index in range(len(y_train)):
        if (y_train[index] in group1):
            y_train_01[index] = 0
        if (y_train[index] in group2):
            y_train_01[index] = 1

    y_test_01 = copy.deepcopy(y_test)
    for index in range(len(y_test)):
        if (y_test[index] in group1):
            y_test_01[index] = 0
        if (y_test[index] in group2):
            y_test_01[index] = 1
            
    dtrain = xgb.DMatrix(X_train_robust_norm, label=y_train_01)
    dtest = xgb.DMatrix(X_test_robust_norm, label=y_test_01)
    watchlist = [(dtest, 'eval'), (dtrain, 'train')]

    model = xgb.Booster(params, [dtrain])
    for _ in range(150):
        pred = model.predict(dtrain)
        g, h = my_softmax(pred, dtrain)
        model.boost(dtrain, g, h)

    yhat = model.predict(dtest)
    yhat_labels = np.argmax(yhat, axis=1)
    acc += f1_score(y_test_01, yhat_labels, average='micro')
    
    y_train = copy.deepcopy(train[well]['Facies'].values)
    y_test = copy.deepcopy(test[well]['Facies'].values )
    indeces_notgroup1 = []
    for index in range(len(y_train)):
        if (y_train[index] not in group1):
            indeces_notgroup1.append(index)

    X_train_robust_norm_group1 = np.delete(X_train_robust_norm, indeces_notgroup1, 0)
    y_train_group1 = np.delete(y_train, indeces_notgroup1)

    y_train_group1_changeclasses = copy.deepcopy(y_train_group1)
    for index in range(len(y_train_group1)):
#         if (y_train_group1[index] == 4):
#             y_train_group1[index] = 1
#         if (y_train_group1[index] == 5):
#             y_train_group1[index] = 2
#         if (y_train_group1[index] == 6):
#             y_train_group1[index] = 3
#         if (y_train_group1[index] == 7):
#             y_train_group1[index] = 4
#         if (y_train_group1[index] == 8):
#             y_train_group1[index] = 5
        if (y_train_group1[index] == 7):
            y_train_group1_changeclasses[index] = 0
        if (y_train_group1[index] == 8):
            y_train_group1_changeclasses[index] = 1

    indeces_notgroup1 = []
    for index in range(len(yhat_labels)):
        if (yhat_labels[index] == 1):
            indeces_notgroup1.append(index)

    X_test_robust_norm_group1 = np.delete(X_test_robust_norm, indeces_notgroup1, 0)
    y_test_group1 = np.delete(y_test, indeces_notgroup1)
    
#     print("yhat_labels ", yhat_labels)
#     print("y_test ", y_test)
#     print("y_test_group1 ", y_test_group1)
    
    if (len(X_test_robust_norm_group1) == 0):
        print("empty test")
        continue

    dtrain_group1 = xgb.DMatrix(np.array(X_train_robust_norm_group1), label=np.array(y_train_group1_changeclasses))
    dtest_group1 = xgb.DMatrix(np.array(X_test_robust_norm_group1), label=np.array(y_test_group1))
    watchlist = [(dtest_group1, 'eval'), (dtrain_group1, 'train')]

    model_group1 = xgb.Booster(params_group1, [dtrain_group1])
    for _ in range(150):
        pred = model_group1.predict(dtrain_group1)
        g, h = my_softmax_group1(pred, dtrain_group1)
        model_group1.boost(dtrain_group1, g, h)

    yhat_group1 = model_group1.predict(dtest_group1)
    yhat_labels_group1 = np.argmax(yhat_group1, axis=1)
    
    yhat_labels_group1_changeclasses = copy.deepcopy(yhat_labels_group1)

    for index in range(len(yhat_labels_group1)):
#         if (yhat_labels_group1[index] == 1):
#             yhat_labels_group1[index] = 4
#         if (yhat_labels_group1[index] == 2):
#             yhat_labels_group1[index] = 5
#         if (yhat_labels_group1[index] == 3):
#             yhat_labels_group1[index] = 6
#         if (yhat_labels_group1[index] == 4):
#             yhat_labels_group1[index] = 7
#         if (yhat_labels_group1[index] == 5):
#             yhat_labels_group1[index] = 8
        if (yhat_labels_group1[index] == 0):
            yhat_labels_group1_changeclasses[index] = 7
        if (yhat_labels_group1[index] == 1):
            yhat_labels_group1_changeclasses[index] = 8

    print("Model group1",  f1_score(y_test_group1, yhat_labels_group1_changeclasses, average='micro'))
    acc_all += f1_score(y_test_group1, yhat_labels_group1_changeclasses, average='micro')

    ind = 0
    for index in range(len(yhat_labels)):
        if index not in indeces_notgroup1:
            yhat_labels[index] = yhat_labels_group1[ind]
            ind  += 1
#     acc_all += f1_score(y_test, yhat_labels, average='micro')
#     print("All", f1_score(y_test, yhat_labels, average='micro'))
    
print('well, boosting of trees, ', acc/10) 
print('well, boosting of trees, ', acc_all/10) 

Recruit F9
[3 3 3 ... 8 4 4]
[1 1 1 ... 1 1 1]
empty test
SHRIMPLIN
[0 0 0 ... 8 4 4]
[1 1 1 ... 1 1 1]
empty test
KIMZEY A
[0 0 0 ... 8 4 4]
[1 1 1 ... 1 1 1]
empty test
NOLAN


KeyboardInterrupt: 

In [12]:
classes

{(0, 0): 0,
 (0, 1): 0,
 (0, 2): 0,
 (0, 3): 0,
 (0, 4): 0,
 (0, 5): 0,
 (0, 6): 0,
 (0, 7): 0,
 (0, 8): 0,
 (1, 0): 0,
 (1, 1): 0,
 (1, 2): 0,
 (1, 3): 0,
 (1, 4): 0,
 (1, 5): 0,
 (1, 6): 0,
 (1, 7): 0,
 (1, 8): 0,
 (2, 0): 0,
 (2, 1): 0,
 (2, 2): 0,
 (2, 3): 0,
 (2, 4): 0,
 (2, 5): 0,
 (2, 6): 0,
 (2, 7): 0,
 (2, 8): 0,
 (3, 0): 0,
 (3, 1): 0,
 (3, 2): 0,
 (3, 3): 0,
 (3, 4): 0,
 (3, 5): 0,
 (3, 6): 0,
 (3, 7): 0,
 (3, 8): 0,
 (4, 0): 0,
 (4, 1): 0,
 (4, 2): 0,
 (4, 3): 0,
 (4, 4): 0,
 (4, 5): 0,
 (4, 6): 0,
 (4, 7): 0,
 (4, 8): 0,
 (5, 0): 0,
 (5, 1): 0,
 (5, 2): 0,
 (5, 3): 0,
 (5, 4): 0,
 (5, 5): 0,
 (5, 6): 0,
 (5, 7): 0,
 (5, 8): 0,
 (6, 0): 0,
 (6, 1): 0,
 (6, 2): 0,
 (6, 3): 0,
 (6, 4): 0,
 (6, 5): 0,
 (6, 6): 0,
 (6, 7): 0,
 (6, 8): 0,
 (7, 0): 0,
 (7, 1): 0,
 (7, 2): 0,
 (7, 3): 0,
 (7, 4): 0,
 (7, 5): 0,
 (7, 6): 0,
 (7, 7): 0,
 (7, 8): 0,
 (8, 0): 0,
 (8, 1): 0,
 (8, 2): 0,
 (8, 3): 0,
 (8, 4): 0,
 (8, 5): 0,
 (8, 6): 0,
 (8, 7): 0,
 (8, 8): 0}