In [1]:
import pandas as pd
data = pd.read_csv('training_data.csv')
data.describe()
test_well = data[data['Well Name'] == 'SHANKLE']
data = data[data['Well Name'] != 'SHANKLE']

In [2]:
features = ['GR', 'ILD_log10', 'DeltaPHI','PHIND','PE','NM_M', 'RELPOS']
feature_vectors = data[features]
facies_labels = data['Facies']
facies_labels.describe()

count    2783.000000
mean        4.558390
std         2.515249
min         1.000000
25%         2.000000
50%         4.000000
75%         7.000000
max         9.000000
Name: Facies, dtype: float64

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(feature_vectors)
scaled_features = scaler.transform(feature_vectors) #ndarray now. 

In [4]:
from sklearn.cross_validation import train_test_split
X_train, X_cv, y_train, y_cv = train_test_split(scaled_features, facies_labels,test_size=0.05, random_state=42)
X_train



array([[-0.87705417,  0.33637903, -0.18928865, ...,  1.44783881,
         0.95329909, -1.63528358],
       [-0.19279901,  0.32811041, -0.25469982, ...,  0.61894043,
         0.95329909,  1.12000618],
       [ 0.64782298, -1.20352041,  0.12785645, ..., -0.88896106,
        -1.04898873, -1.26304115],
       ...,
       [ 0.2221964 ,  0.04857211,  0.98018391, ..., -0.74689616,
        -1.04898873,  0.94954002],
       [ 0.53236719, -0.39031519, -0.20911021, ..., -0.96726455,
        -1.04898873,  0.72341145],
       [ 0.34268302, -0.51115421,  1.13875646, ..., -0.78940377,
        -1.04898873, -1.33261918]])

In [5]:
from sklearn.metrics import classification_report
target_names = ['SS', 'CSiS', 'FSiS', 'SiSh','MS', 'WS', 'D','PS', 'BS']

In [6]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-1,hidden_layer_sizes=(15,), random_state=1)
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(15,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [7]:
y_cv_pred = clf.predict(X_cv) 
print(classification_report(y_cv, y_cv_pred,target_names=target_names))

             precision    recall  f1-score   support

         SS       0.91      0.62      0.74        16
       CSiS       0.67      0.69      0.68        29
       FSiS       0.44      0.57      0.50        14
       SiSh       0.45      0.71      0.56         7
         MS       0.64      0.39      0.48        18
         WS       0.65      0.57      0.60        23
          D       0.33      0.33      0.33         3
         PS       0.64      0.70      0.67        23
         BS       0.55      0.86      0.67         7

avg / total       0.64      0.61      0.61       140



In [8]:
y_test = test_well['Facies']

In [9]:
well_features = test_well.drop(['Facies','Formation','Well Name','Depth'],axis=1)
X_test = scaler.transform(well_features)
y_pred = clf.predict(X_test)
test_well['Prediction'] = y_pred

In [10]:

print(classification_report(y_test, y_pred,target_names=target_names))

             precision    recall  f1-score   support

         SS       0.82      0.30      0.44        89
       CSiS       0.35      0.72      0.47        89
       FSiS       0.74      0.50      0.60       117
       SiSh       0.06      0.14      0.08         7
         MS       0.57      0.21      0.31        19
         WS       0.71      0.65      0.68        71
          D       0.88      0.82      0.85        17
         PS       0.58      0.65      0.61        40
         BS       0.00      0.00      0.00         0

avg / total       0.65      0.54      0.54       449



  'recall', 'true', average, warn_for)
