In [1]:
import openpyxl
from openpyxl import load_workbook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook

In [2]:
df = pd.read_excel("data.xlsx", sheet_name=None, header=0)

In [3]:
all_df = {}
for i, d in enumerate(df.items()):
    name, data = d
    all_df[name] = data

In [4]:
data = all_df['1K-01']

In [5]:
data = data[data['DEPTH'] == data["DEPTH"]]   # drop NaN value row

In [6]:
data = data.drop(["Core_Facies"], axis=1)

In [7]:
data['Log_Facies'].apply(lambda x: str(x)).describe()

count     1025
unique       3
top        1.0
freq       530
Name: Log_Facies, dtype: object

In [8]:
comb = [(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)]
def label_facies(row):
    f = row['Fluid']
    lf = row['Log_Facies']
    return comb.index((f, lf))
data['MixLabel'] = data.apply(label_facies, axis=1)

## L R

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [10]:
X = data[["DEPTH", "CALI", "DT", "GR", "LLD", "LLS", "MSFL", "NPHI", "RHOB"]]
ss = StandardScaler()
X = ss.fit_transform(X)

In [11]:
Y = data['Log_Facies']

In [12]:
clf = SVC()

In [13]:
clf.fit(X, Y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [14]:
Y_pred = clf.predict(X)

In [15]:
from sklearn.metrics import confusion_matrix 

In [16]:
results = confusion_matrix(Y, Y_pred)
results

array([[508,  22,   0],
       [ 80, 343,   8],
       [  1,   4,  59]])

In [17]:
from sklearn.metrics import classification_report
report = classification_report(Y, Y_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.86      0.96      0.91       530
         2.0       0.93      0.80      0.86       431
         3.0       0.88      0.92      0.90        64

   micro avg       0.89      0.89      0.89      1025
   macro avg       0.89      0.89      0.89      1025
weighted avg       0.89      0.89      0.89      1025



In [18]:
data_test = all_df['1X-02']
data_test = data_test[data_test['DEPTH'] == data_test["DEPTH"]]  
data_test = data_test.drop(["Core_Facies"], axis=1)

data_test['MixLabel'] = data_test.apply(label_facies, axis=1)
X_test = data_test[["DEPTH", "CALI", "DT", "GR", "LLD", "LLS", "MSFL", "NPHI", "RHOB"]]
X_test = ss.fit_transform(X_test)
Y_test = data_test['Log_Facies']
Y_test_pred = clf.predict(X_test)

In [19]:
results = confusion_matrix(Y_test, Y_test_pred)
results

array([[534, 188,   2],
       [ 48, 248,  29],
       [  9,   7,  29]])

In [20]:
report = classification_report(Y_test, Y_test_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.90      0.74      0.81       724
         2.0       0.56      0.76      0.65       325
         3.0       0.48      0.64      0.55        45

   micro avg       0.74      0.74      0.74      1094
   macro avg       0.65      0.72      0.67      1094
weighted avg       0.78      0.74      0.75      1094



In [21]:
Y_t = Y_test[Y_test_pred==Y_test]

In [22]:
Y_t.shape[0]/Y_test.shape[0]

0.7413162705667276

In [23]:
Y_t.shape[0]

811