In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import timeit

In [3]:
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

In [4]:
df = pd.read_csv('data.csv')

In [5]:
df.set_index('Depth', inplace=True)

In [6]:
df['log_RT'] = np.log(df['RT'])

In [7]:
df = df[df['GR']<250]

In [8]:
data = df[['GR','RHOB','NPHI','Facies']]

In [9]:
size = 60

In [10]:
train_scores = []
test_scores = []
time =[]

In [11]:
#1st run

In [12]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)

In [13]:
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]

In [14]:
f_f = MinMaxScaler().fit_transform(features)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)

In [16]:
cv = KFold(n_splits=3, shuffle = True)

In [17]:
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'rbf', 'gamma': 'auto', 'C': 1}
Mean Accuracy score for this prediction is 1.0


In [18]:
# 2nd run

In [19]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'poly', 'gamma': 1, 'C': 10}
Mean Accuracy score for this prediction is 0.9296296296296296


In [20]:
# 3rd run

In [21]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'linear', 'gamma': 0.01, 'C': 100}
Mean Accuracy score for this prediction is 0.9296296296296296


In [22]:
# 4th run

In [23]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'sigmoid', 'gamma': 1, 'C': 100}
Mean Accuracy score for this prediction is 1.0


In [24]:
#5th run

In [25]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'linear', 'gamma': 0.1, 'C': 100}
Mean Accuracy score for this prediction is 1.0


In [26]:
#6th run

In [27]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'linear', 'gamma': 'auto', 'C': 10}
Mean Accuracy score for this prediction is 0.9296296296296296


In [28]:
# 7th run

In [29]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'sigmoid', 'gamma': 1, 'C': 10}
Mean Accuracy score for this prediction is 0.9333333333333332


In [30]:
# 8th run

In [31]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'poly', 'gamma': 1, 'C': 10}
Mean Accuracy score for this prediction is 1.0


In [32]:
# 9th run

In [33]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'sigmoid', 'gamma': 'auto', 'C': 100}
Mean Accuracy score for this prediction is 1.0


In [34]:
#10th run

In [35]:
s1 = data[data.Facies == 'SH'].sample(n=int(size/3))
s2 = data[data.Facies == 'SS'].sample(n=int(size/3))
s3 = data[data.Facies == 'CB'].sample(n=int(size/3))
sample = pd.concat([s1,s2])
sample.sort_index(inplace=True)
features = sample.iloc[:,:-1]
target = sample.iloc[:,-1]
f_f = MinMaxScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(f_f, target, test_size = 0.3)
start_time = timeit.default_timer()
cv = KFold(n_splits=3, shuffle = True)
model = SVC(kernel='linear', C=1, gamma=1)

param_grid = {'C':[0.1,1,10,100],'kernel':['linear','poly','sigmoid','rbf'],
              'gamma':['scale','auto',0.001,0.01,0.1,1]}

grid = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_iter=50, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_test = grid.predict(X_test)
y_pred_train = grid.predict(X_train)
elapsed = timeit.default_timer() - start_time
train_scores.append(accuracy_score(y_train, y_pred_train))
test_scores.append(accuracy_score(y_test, y_pred_test))
time.append(elapsed)
print(grid.best_params_)
print('Mean Accuracy score for this prediction is {}'.format(grid.best_score_))

{'kernel': 'rbf', 'gamma': 1, 'C': 1}
Mean Accuracy score for this prediction is 0.8962962962962964


In [36]:
print(train_scores)
print(test_scores)
print(time)

[1.0, 0.9285714285714286, 1.0, 1.0, 1.0, 0.9642857142857143, 0.9285714285714286, 1.0, 1.0, 0.8928571428571429]
[0.9166666666666666, 1.0, 0.9166666666666666, 0.9166666666666666, 1.0, 1.0, 0.8333333333333334, 0.9166666666666666, 0.9166666666666666, 0.6666666666666666]
[0.1047901999999965]
