In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

In [7]:
import warnings
warnings.filterwarnings("ignore")

In [8]:
data = pd.read_csv('facies_vectors.csv')

Bayes_algorithms:

In [13]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB

# with mean value
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
data = data.fillna(data['PE'].mean())
## with split
acc_gaus = 0
acc_bern = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_gaus = GaussianNB()
    clf_gaus.fit(X_train_norm , y_train)
    y_predict = clf_gaus.predict(X_test_norm)
    acc_gaus += f1_score(y_test, y_predict, average='micro')
    
    
    clf_bern = BernoulliNB(alpha=1, binarize=0.0)
    clf_bern.fit(X_train_norm , y_train)
    y_predict = clf_bern.predict(X_test_norm)
    acc_bern += f1_score(y_test, y_predict, average='micro')
    
print('mean, 7:3, acc_gaus = ', acc_gaus/10)
print('mean, 7:3, acc_bern = ', acc_bern/10)

## with test well
params_bern= dict()
params_bern['LUKE G U'] = {'alpha': 10, 'binarize': -1.0}
params_bern['KIMZEY A'] = {'alpha': 10, 'binarize': -1.0}
params_bern['CROSS H CATTLE'] = {'alpha': 5, 'binarize': 0.0}
params_bern['NEWBY'] = {'alpha': 10, 'binarize': -1.0}
params_bern['SHRIMPLIN'] = {'alpha': 1, 'binarize': 0.0}
params_bern['ALEXANDER D'] = {'alpha': 10, 'binarize': -1.0}
params_bern['SHANKLE'] = {'alpha': 10, 'binarize': 0.0}
params_bern['CHURCHMAN BIBLE'] ={'alpha': 1, 'binarize': 1.0}
params_bern['Recruit F9'] = {'alpha': 5, 'binarize': -1.0}
params_bern['NOLAN'] = {'alpha': 1, 'binarize': -1.0}
acc_gaus = 0
acc_bern = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_gaus = GaussianNB()
    clf_gaus.fit(X_train_norm , y_train)
    y_predict = clf_gaus.predict(X_test_norm)
    acc_gaus += f1_score(y_test, y_predict, average='micro')
    
    
    clf_bern = BernoulliNB(alpha=params_bern[well]['alpha'], binarize=params_bern[well]['binarize'])
    clf_bern.fit(X_train_norm , y_train)
    y_predict = clf_bern.predict(X_test_norm)
    acc_bern += f1_score(y_test, y_predict, average='micro')
    
print('mean, well, acc_gaus = ', acc_gaus/10)
print('mean, well, acc_bern = ', acc_bern/10)

# without PE
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'NM_M', 'RELPOS']
## with split
acc_gaus = 0
acc_bern = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_gaus = GaussianNB()
    clf_gaus.fit(X_train_norm , y_train)
    y_predict = clf_gaus.predict(X_test_norm)
    acc_gaus += f1_score(y_test, y_predict, average='micro')
    
    
    clf_bern = BernoulliNB(alpha=1, binarize=0.0)
    clf_bern.fit(X_train_norm , y_train)
    y_predict = clf_bern.predict(X_test_norm)
    acc_bern += f1_score(y_test, y_predict, average='micro')
    
print('without PE, 7:3, acc_gaus = ', acc_gaus/10)
print('without PE, 7:3, acc_bern = ', acc_bern/10)

## with test well
acc_gaus = 0
acc_bern = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_gaus = GaussianNB()
    clf_gaus.fit(X_train_norm , y_train)
    y_predict = clf_gaus.predict(X_test_norm)
    acc_gaus += f1_score(y_test, y_predict, average='micro')
    
    
    clf_bern = BernoulliNB(alpha=params_bern[well]['alpha'], binarize=params_bern[well]['binarize'])
    clf_bern.fit(X_train_norm , y_train)
    y_predict = clf_bern.predict(X_test_norm)
    acc_bern += f1_score(y_test, y_predict, average='micro')
    
print('without PE, well, acc_gaus = ', acc_gaus/10)
print('without PE, well, acc_bern = ', acc_bern/10)    

mean, 7:3, acc_gaus =  0.18048192771084337
mean, 7:3, acc_bern =  0.49670682730923693
mean, well, acc_gaus =  0.2372882516172778
mean, well, acc_bern =  0.3876463620579104
without PE, 7:3, acc_gaus =  0.16313253012048193
without PE, 7:3, acc_bern =  0.48313253012048196
without PE, well, acc_gaus =  0.21930275634616345
without PE, well, acc_bern =  0.38067969134840074


SVM

In [10]:
from sklearn.svm import SVC

# with mean value
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
data = data.fillna(data['PE'].mean())
## with split
acc_svm = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_svm = SVC(C=10, degree=2, kernel='rbf')
    clf_svm.fit(X_train_norm , y_train)
    y_predict = clf_svm.predict(X_test_norm)
    acc_svm += f1_score(y_test, y_predict, average='micro')
    
print('mean, 7:3, acc_svm = ', acc_svm/10)

## with test well
params_svm = dict()
params_svm['LUKE G U'] = {'C': 1, 'degree': 3, 'kernel': 'poly'}
params_svm['KIMZEY A'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['CROSS H CATTLE'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['NEWBY'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['SHRIMPLIN'] = {'C': 10, 'degree': 2, 'kernel': 'linear'}
params_svm['ALEXANDER D'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['SHANKLE'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['CHURCHMAN BIBLE'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['Recruit F9'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
params_svm['NOLAN'] = {'C': 1, 'degree': 2, 'kernel': 'rbf'}
acc_svm = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_svm = SVC(C=params_svm[well]['C'], degree=params_svm[well]['degree'], kernel=params_svm[well]['kernel'])
    clf_svm.fit(X_train_norm , y_train)
    y_predict = clf_svm.predict(X_test_norm)
    acc_svm += f1_score(y_test, y_predict, average='micro')
        
print('mean, well, acc_svm = ', acc_svm/10)

# without PE
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'NM_M', 'RELPOS']
## with split
acc_svm = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_svm = SVC(C=10, degree=2, kernel='rbf')
    clf_svm.fit(X_train_norm , y_train)
    y_predict = clf_svm.predict(X_test_norm)
    acc_svm += f1_score(y_test, y_predict, average='micro')
    
print('without PE, 7:3, acc_svm = ', acc_svm/10)

## with test well
acc_svm = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_svm = SVC(C=params_svm[well]['C'], degree=params_svm[well]['degree'], kernel=params_svm[well]['kernel'])
    clf_svm.fit(X_train_norm , y_train)
    y_predict = clf_svm.predict(X_test_norm)
    acc_svm += f1_score(y_test, y_predict, average='micro')
    
print('without PE, well, acc_svm = ', acc_svm/10)

mean, 7:3, acc_svm =  0.6652208835341364
mean, well, acc_svm =  0.5033048652200915
without PE, 7:3, acc_svm =  0.6326104417670682
without PE, well, acc_svm =  0.4973203528611485


Logistic regression

In [11]:
from sklearn.linear_model import LogisticRegression

# with mean value
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
data = data.fillna(data['PE'].mean())
## with split
acc_lr = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_lr = LogisticRegression(C=10, class_weight=None, fit_intercept=True, multi_class='ovr', solver='saga')
    clf_lr.fit(X_train_norm , y_train)
    y_predict = clf_lr.predict(X_test_norm)
    acc_lr += f1_score(y_test, y_predict, average='micro')
    
print('mean, 7:3, acc_lr = ', acc_lr/10)

## with test well
params_logr = dict()
params_logr['LUKE G U'] = {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'saga'}
params_logr['KIMZEY A'] = {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'lbfgs'}
params_logr['CROSS H CATTLE'] = {'C': 1, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'}
params_logr['NEWBY'] = {'C': 10, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'sag'}
params_logr['SHRIMPLIN'] = {'C': 5, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'}
params_logr['ALEXANDER D'] = {'C': 5, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'}
params_logr['SHANKLE'] = {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'newton-cg'}
params_logr['CHURCHMAN BIBLE'] = {'C': 1, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'}
params_logr['Recruit F9'] = {'C': 10, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'sag'}
params_logr['NOLAN'] = {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'newton-cg'}
acc_lr = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_lr = LogisticRegression(C=params_logr[well]['C'], class_weight=params_logr[well]['class_weight'],
                                fit_intercept=params_logr[well]['fit_intercept'], multi_class=params_logr[well]['multi_class'],
                                solver=params_logr[well]['solver'])
    clf_lr.fit(X_train_norm , y_train)
    y_predict = clf_lr.predict(X_test_norm)
    acc_lr += f1_score(y_test, y_predict, average='micro')
        
print('mean, well, acc_lr = ', acc_lr/10)

# without PE
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'NM_M', 'RELPOS']
## with split
acc_lr = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_lr = LogisticRegression(C=10, class_weight=None, fit_intercept=True, multi_class='ovr', solver='saga')
    clf_lr.fit(X_train_norm , y_train)
    y_predict = clf_lr.predict(X_test_norm)
    acc_lr += f1_score(y_test, y_predict, average='micro')
    
print('without PE, 7:3, acc_lr = ', acc_lr/10)

## with test well
acc_lr = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_lr = LogisticRegression(C=params_logr[well]['C'], class_weight=params_logr[well]['class_weight'],
                                fit_intercept=params_logr[well]['fit_intercept'], multi_class=params_logr[well]['multi_class'],
                                solver=params_logr[well]['solver'])
    clf_lr.fit(X_train_norm , y_train)
    y_predict = clf_lr.predict(X_test_norm)
    acc_lr += f1_score(y_test, y_predict, average='micro')
    
print('without PE, well, acc_lr = ', acc_lr/10)

mean, 7:3, acc_lr =  0.5606425702811246
mean, well, acc_lr =  0.4523152952415469
without PE, 7:3, acc_lr =  0.5329317269076305
without PE, well, acc_lr =  0.4339687309424393


Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier

# with mean value
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
data = data.fillna(data['PE'].mean())
## with split
acc_dtr = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_dtr = DecisionTreeClassifier(criterion='entropy', max_depth= None, max_features= None, min_samples_split= 3, splitter= 'best')
    clf_dtr.fit(X_train_norm , y_train)
    y_predict = clf_dtr.predict(X_test_norm)
    acc_dtr += f1_score(y_test, y_predict, average='micro')
    
print('mean, 7:3, acc_dtr = ', acc_dtr/10)

## with test well
params = dict()
params['LUKE G U'] = {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} 
params['KIMZEY A'] = {'criterion': 'entropy', 'max_depth': 4, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['CROSS H CATTLE'] = {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['NEWBY'] = {'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto', 'min_samples_split': 4, 'splitter': 'best'}
params['SHRIMPLIN'] = {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['ALEXANDER D'] = {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['SHANKLE'] = {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'min_samples_split': 4, 'splitter': 'best'}
params['CHURCHMAN BIBLE'] = {'criterion': 'gini', 'max_depth': 3, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['Recruit F9'] = {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
params['NOLAN'] = {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
acc_dtr = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_dtr = DecisionTreeClassifier(criterion=params[well]['criterion'], max_depth=params[well]['max_depth'], max_features = params[well]['max_features'],
                                     min_samples_split=params[well]['min_samples_split'], splitter=params[well]['splitter'])
    clf_dtr.fit(X_train_norm , y_train)
    y_predict = clf_dtr.predict(X_test_norm)
    acc_dtr += f1_score(y_test, y_predict, average='micro')
        
print('mean, well, acc_dtr = ', acc_dtr/10)

# without PE
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'NM_M', 'RELPOS']
## with split
acc_dtr = 0
for index in range(10):
    train, test = train_test_split(data, test_size=0.3)
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)
    
    clf_dtr = DecisionTreeClassifier(criterion='entropy', max_depth= None, max_features= None, min_samples_split= 3, splitter= 'best')
    clf_dtr.fit(X_train_norm , y_train)
    y_predict = clf_dtr.predict(X_test_norm)
    acc_dtr += f1_score(y_test, y_predict, average='micro')
print('without PE, 7:3, acc_dtr = ', acc_dtr/10)

## with test well
acc_dtr = 0
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
        
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf_dtr = DecisionTreeClassifier(criterion=params[well]['criterion'], max_depth=params[well]['max_depth'], max_features = params[well]['max_features'], min_samples_split=params[well]['min_samples_split'], splitter=params[well]['splitter'])
    clf_dtr.fit(X_train_norm , y_train)
    y_predict = clf_dtr.predict(X_test_norm)
    acc_dtr += f1_score(y_test, y_predict, average='micro')
    
print('without PE, well, acc_dtr = ', acc_dtr/10)

mean, 7:3, acc_dtr =  0.6132530120481927
mean, well, acc_dtr =  0.49514352865282885
without PE, 7:3, acc_dtr =  0.5963855421686747
without PE, well, acc_dtr =  0.4786621983193104
