In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

In [2]:
data = pd.read_csv('facies_vectors.csv')
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
data = data.fillna(data['PE'].mean())

GridSearch Decision Tree

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer


scorer = make_scorer(f1_score,average='micro')
parameters = {'criterion':['gini', 'entropy'], 'splitter':['random', 'best'], 'max_depth':[1, 2, 3, 4, 5, None], 
             'min_samples_split':[2, 3, 4, 5,], 'max_features':['auto', 'log2', None]}
             
dt = DecisionTreeClassifier()
clf = GridSearchCV(dt, parameters, scoring=scorer)
# Split Data
train, test = train_test_split(data, test_size=0.3)
X_train = train[feature_names].values
y_train = train['Facies'].values
X_test = test[feature_names].values
y_test = test['Facies'].values
    
scaler = StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

clf.fit(X_train_norm, y_train)
print('decisiontree, split', clf.best_params_)
#Test well
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf.fit(X_train_norm, y_train)
    print('decisiontree, well', clf.best_params_, well )

decisiontree, split {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'}
decisiontree, well {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} LUKE G U
decisiontree, well {'criterion': 'entropy', 'max_depth': 4, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} KIMZEY A
decisiontree, well {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} CROSS H CATTLE
decisiontree, well {'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto', 'min_samples_split': 4, 'splitter': 'best'} NEWBY
decisiontree, well {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} SHRIMPLIN
decisiontree, well {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'min_samples_split': 2, 'splitter': 'best'} ALEXANDER D
decisiontree, well {'criterion': 'gini', 'max_depth': 5, 'ma

GridSearch SVM

In [27]:
from sklearn.svm import SVC
from sklearn.metrics import make_scorer

scorer = make_scorer(f1_score,average='micro')
parameters = {'kernel':['linear', 'poly', 'rbf'], 'C':[1, 5, 10], 'degree':[2, 3, 4]}
svc = SVC(probability=True)
clf = GridSearchCV(svc, parameters, scoring=scorer)
#Split data
train, test = train_test_split(data, test_size=0.3)
X_train = train[feature_names].values
y_train = train['Facies'].values
X_test = test[feature_names].values
y_test = test['Facies'].values
    
scaler = StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

clf.fit(X_train_norm, y_train)
print('svm, split', clf.best_params_)
#Test well
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf.fit(X_train_norm, y_train)
    print('svm, well', clf.best_params_, well )


svm, split {'C': 10, 'degree': 2, 'kernel': 'rbf'}
svm, well {'C': 1, 'degree': 3, 'kernel': 'poly'} LUKE G U
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} KIMZEY A
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} CROSS H CATTLE
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} NEWBY
svm, well {'C': 10, 'degree': 2, 'kernel': 'linear'} SHRIMPLIN
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} ALEXANDER D
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} SHANKLE
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} CHURCHMAN BIBLE
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} Recruit F9
svm, well {'C': 1, 'degree': 2, 'kernel': 'rbf'} NOLAN


Gridsearch Logistic Regression:

In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer

scorer = make_scorer(f1_score,average='micro')
parameters = {'C':[1, 5, 10],'fit_intercept':[True, False],'class_weight':['balanced', None],
              'solver': ['newton-cg', 'lbfgs','sag', 'saga'], 'multi_class' : ['ovr', 'multinomial']}
logr = LogisticRegression()
clf = GridSearchCV(logr, parameters, scoring=scorer)
#Split data
train, test = train_test_split(data, test_size=0.3)
X_train = train[feature_names].values
y_train = train['Facies'].values
X_test = test[feature_names].values
y_test = test['Facies'].values
    
scaler = StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

clf.fit(X_train_norm, y_train)
print('logistic regression, split', clf.best_params_)
#Test well
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    clf.fit(X_train_norm, y_train)
    print('logistic regression, well', clf.best_params_, well )






logistic regression, split {'C': 10, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'saga'}






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'saga'} LUKE G U






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'lbfgs'} KIMZEY A






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'} CROSS H CATTLE






logistic regression, well {'C': 10, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'sag'} NEWBY






logistic regression, well {'C': 5, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'} SHRIMPLIN






logistic regression, well {'C': 5, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'} ALEXANDER D






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'newton-cg'} SHANKLE






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': False, 'multi_class': 'ovr', 'solver': 'newton-cg'} CHURCHMAN BIBLE






logistic regression, well {'C': 10, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'sag'} Recruit F9






logistic regression, well {'C': 1, 'class_weight': None, 'fit_intercept': True, 'multi_class': 'ovr', 'solver': 'newton-cg'} NOLAN


Grid Search Bernoulli NaiveBayes

In [35]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import make_scorer

scorer = make_scorer(f1_score,average='micro')
parameters_bnb = {'alpha':[1, 5, 10],'binarize':[-1.0, 0.0, 1.0]}
bnb = BernoulliNB()
clf_bnb = GridSearchCV(bnb, parameters_bnb, scoring=scorer)
#Split data
train, test = train_test_split(data, test_size=0.3)
X_train = train[feature_names].values
y_train = train['Facies'].values
X_test = test[feature_names].values
y_test = test['Facies'].values
    
scaler = StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

clf_bnb.fit(X_train_norm, y_train)
print('Bernouili Naive Bayes, split', clf_bnb.best_params_)
#Test well
for well in set(data['Well Name']):
    train = data[data['Well Name'] != well]
    test = data[data['Well Name'] == well]
    X_train = train[feature_names].values
    y_train = train['Facies'].values
    X_test = test[feature_names].values
    y_test = test['Facies'].values
    
    scaler = StandardScaler().fit(X_train)
    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    
    clf_bnb.fit(X_train_norm, y_train)
    print('Bernouili Naive Bayes, well', clf_bnb.best_params_, well)


Bernouili Naive Bayes, split {'alpha': 1, 'binarize': 0.0}
Bernouili Naive Bayes, well {'alpha': 10, 'binarize': -1.0} LUKE G U
Bernouili Naive Bayes, well {'alpha': 10, 'binarize': -1.0} KIMZEY A
Bernouili Naive Bayes, well {'alpha': 5, 'binarize': 0.0} CROSS H CATTLE
Bernouili Naive Bayes, well {'alpha': 10, 'binarize': -1.0} NEWBY
Bernouili Naive Bayes, well {'alpha': 1, 'binarize': 0.0} SHRIMPLIN
Bernouili Naive Bayes, well {'alpha': 10, 'binarize': -1.0} ALEXANDER D
Bernouili Naive Bayes, well {'alpha': 10, 'binarize': 0.0} SHANKLE
Bernouili Naive Bayes, well {'alpha': 1, 'binarize': 1.0} CHURCHMAN BIBLE
Bernouili Naive Bayes, well {'alpha': 5, 'binarize': -1.0} Recruit F9
Bernouili Naive Bayes, well {'alpha': 1, 'binarize': -1.0} NOLAN
