In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
data = pd.read_csv('facies_vectors.csv')
data = data.fillna(data['PE'].mean())
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']

In [10]:
def augment_features_window(X, N_neig):
    N_row = X.shape[0]
    N_feat = X.shape[1]
    X = np.vstack((np.zeros((N_neig, N_feat)), X, np.zeros((N_neig, N_feat))))
    X_aug = np.zeros((N_row, N_feat*(2*N_neig+1)))
    for r in np.arange(N_row) + N_neig:
        this_row = []
        for c in np.arange(-N_neig,N_neig+1):
            this_row = np.hstack((this_row, X[r+c]))
        X_aug[r-N_neig] = this_row
    return X_aug

def augment_features_gradient(X, depth):
    d_diff = np.diff(depth).reshape((-1, 1))
    d_diff[d_diff==0] = 0.001
    X_diff = np.diff(X, axis=0)
    X_grad = X_diff / d_diff
    X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))
    
    return X_grad

def augment_features(X, well, depth, N_neig=1):
    X_aug = np.zeros((X.shape[0], X.shape[1]*(2*N_neig+2)))
    for w in np.unique(well):
        w_idx = np.where(well == w)[0]
        X_aug_win = augment_features_window(X[w_idx, :], N_neig)
        X_aug_grad = augment_features_gradient(X[w_idx, :], depth[w_idx])
        X_aug[w_idx, :] = np.concatenate((X_aug_win, X_aug_grad), axis=1)
        
    return X_aug


In [11]:
def accuracy(y_predict, y_test):
    truth = [y_predict[index] for index in range(len(y_predict)) if y_predict[index] == y_test[index]]
    return len(truth)/len(y_predict)

In [14]:
import xgboost as xgb
from xgboost.sklearn import  XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import f1_score
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
def classification(clf):
    acc = 0
    clf.fit(X_train_robust_norm , y_train)
    y_predict = clf.predict(X_test_robust_norm)
    acc += f1_score(y_test, y_predict, average='micro')
    print('With augm',acc)
    return acc

In [19]:
w1
global_acc = 0
test = dict()
train = dict()
index = 0
for well in set(data['Well Name']):

    test[well] = data[data['Well Name'] == well]
    train[well] = data[data['Well Name'] != well]
    X_train = train[well][feature_names].values 
    y_train = train[well]['Facies'].values 
    X_test = test[well][feature_names].values 
    y_test = test[well]['Facies'].values 
    well_train = train[well]['Well Name'].values
    well_test = test[well]['Well Name'].values
    depth_train = train[well]['Depth'].values
    depth_test = test[well]['Depth'].values
    
    X_aug_train = augment_features(X_train,well_train,depth_train)
    X_aug_test = augment_features(X_test,well_test,depth_test)
    
    robust = preprocessing.RobustScaler(quantile_range=(25.0, 75.0)).fit(X_aug_train)
    X_train_robust = robust.transform(X_aug_train)
    X_test_robust = robust.transform(X_aug_test)

    scaler = StandardScaler().fit(X_train_robust)
    X_train_robust_norm = scaler.transform(X_train_robust)
    X_test_robust_norm = scaler.transform(X_test_robust)
    
    clf_svm = SVC(C=10, degree=2, kernel='rbf')
    clf_bagging = BaggingClassifier(base_estimator=clf_svm,  n_estimators=100)
    global_acc += classification(clf_bagging)
print(global_acc/10)

With augm 0.5944206008583691
With augm 0.5222772277227723
With augm 0.37325349301397204
With augm 0.4939759036144578
With augm 0.5422993492407809
With augm 0.46436285097192226
With augm 0.7
With augm 0.4988610478359909
With augm 0.5605095541401274
With augm 0.4209354120267261
0.5170895439425118
