In [1]:
import glob
import pandas as pd
import numpy as np

file_name_list = glob.glob("../../AmirSaman/Final/clean/*.csv")

feature_set = ['wellName','DEPTH', 'CALI', 'DENB', 'DRHO','DTCOMP', 'GR', 'NEUT','RDEEP', 'RMICRO']

file_list = []

for file in file_name_list:
    df = pd.read_csv(file, index_col=None, skiprows=[1])
    file_list.append(df[feature_set])

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
import matplotlib.pyplot as plt

def plt_this(y):
    plt.plot(y)
    plt.show()


In [3]:
import os

def save_res(dept, pred_y, err, well, algo_name):
    res_path = "Results/%s"  %well
    if not os.path.isdir(res_path):
        os.makedirs(res_path)
        
    result = pd.DataFrame({'NEUT': dept,
                           'Pred_NEUT': pred_y})
    result.to_csv('%s/%s.csv' %(res_path, algo_name))
    
    score = pd.DataFrame({'ERROR' : [err]})
    score.to_csv('%s/%s_score.csv' %(res_path, algo_name))

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import explained_variance_score, r2_score

def cross_val(clf, algo_name, feature_set=['DEPTH', 'CALI', 'DENB', 'DRHO','DTCOMP', 'GR', 'RDEEP', 'RMICRO']):
    
    wells = []
    err_list = []

    for i in range(len(file_list)):
        test_df = file_list[i]
        wells.append(test_df.iloc[0,0])
        print('%s : %s' %(i, wells[i]))

        train_list = file_list.copy()
        train_list.pop(i)
        train_df = pd.concat(train_list)

        test_x = test_df[feature_set].values
        test_y = test_df[['NEUT']].values
        test_y = test_y.ravel()
        
        train_X = train_df[feature_set].values
        train_y = train_df[['NEUT']].values
        train_y = train_y.ravel()
        
        # feature scaling
        scaler = StandardScaler()
        scaler.fit_transform(train_X)
        scaler.transform(test_x)

        # training
        mdl = clf()
        mdl.fit(train_X, train_y)
        
        # testing
        pred_y = mdl.predict(test_x)
    
        # error
#         abs_error = np.divide((np.abs(np.subtract(test_y, pred_y))), test_y)
        
#         plt_this(abs_error)
#         plt_this(pred_y)
#         plt_this(test_y)
            
#         err = explained_variance_score(test_y, pred_y)
        err = r2_score(test_y, pred_y)
        
        err_list.append(err)
        
        # save results
        save_res(test_x[:,0], pred_y, err, wells[i], algo_name)

    print()

    avg_err = np.mean(err_list)

    for i in range(len(wells)):
        print('Test score on %s : %s' %(wells[i], err_list[i]))

    print()
    print('Average algorithm score: %s' %avg_err)


# Linear regression

In [5]:
from sklearn.linear_model import LinearRegression

feature_set = ['DEPTH', 'CALI', 'DENB', 'DRHO','DTCOMP', 'GR', 'RDEEP', 'RMICRO'] #0.372

cross_val(LinearRegression, 'Linear regression', feature_set)

0 : WELL1COMPOSITE
1 : WELL4COMPOSITE
2 : WELL2COMPOSITE
3 : WELL5COMPOSITE
4 : WELL3COMPOSITE
5 : WELL6COMPOSITE

Test score on WELL1COMPOSITE : 0.9766166210510479
Test score on WELL4COMPOSITE : 0.9763924881993657
Test score on WELL2COMPOSITE : 0.9815734897638569
Test score on WELL5COMPOSITE : 0.751518802177578
Test score on WELL3COMPOSITE : 0.9981288565504328
Test score on WELL6COMPOSITE : 0.5679568772634083

Average algorithm score: 0.8753645225009482


# Random Forrest

In [6]:
from sklearn.ensemble import RandomForestRegressor

cross_val(RandomForestRegressor, 'Random forrest')

0 : WELL1COMPOSITE




1 : WELL4COMPOSITE




2 : WELL2COMPOSITE




3 : WELL5COMPOSITE




4 : WELL3COMPOSITE




5 : WELL6COMPOSITE





Test score on WELL1COMPOSITE : 0.9993230191300995
Test score on WELL4COMPOSITE : 0.9774310937625653
Test score on WELL2COMPOSITE : 0.9873152577456992
Test score on WELL5COMPOSITE : 0.7513376130004867
Test score on WELL3COMPOSITE : 0.9754457676031499
Test score on WELL6COMPOSITE : 0.5762196013756198

Average algorithm score: 0.8778453921029367


# Gradient Boosting

In [7]:
from lightgbm import LGBMRegressor

cross_val(LGBMRegressor, 'Gradient boosting')

0 : WELL1COMPOSITE
1 : WELL4COMPOSITE
2 : WELL2COMPOSITE
3 : WELL5COMPOSITE
4 : WELL3COMPOSITE
5 : WELL6COMPOSITE

Test score on WELL1COMPOSITE : 0.9970597519181443
Test score on WELL4COMPOSITE : 0.977328328985528
Test score on WELL2COMPOSITE : 0.9882689998590557
Test score on WELL5COMPOSITE : 0.7325851773176175
Test score on WELL3COMPOSITE : 0.9854178754216131
Test score on WELL6COMPOSITE : -4.911524287061389

Average algorithm score: -0.03847735892657159


# SVR

In [None]:
from sklearn.svm import SVR

cross_val(SVR, 'Support vector machine')

0 : WELL1COMPOSITE




1 : WELL4COMPOSITE




2 : WELL2COMPOSITE




3 : WELL5COMPOSITE


# XGBoost

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import explained_variance_score, r2_score
from xgboost import plot_tree
import matplotlib.pyplot as plt


def cross_val(clf, algo_name, feature_set=['DEPTH', 'CALI', 'DENB', 'DRHO','DTCOMP', 'GR', 'RDEEP', 'RMICRO']):
    
    wells = []
    err_list = []

    for i in range(len(file_list)):
        test_df = file_list[i]
        wells.append(test_df.iloc[0,0])
        print('%s : %s' %(i, wells[i]))

        train_list = file_list.copy()
        train_list.pop(i)
        train_df = pd.concat(train_list)

        test_x = test_df[feature_set].values
        test_y = test_df[['NEUT']].values
        test_y = test_y.ravel()
        
        train_X = train_df[feature_set].values
        train_y = train_df[['NEUT']].values
        train_y = train_y.ravel()
        
        # feature scaling
        scaler = StandardScaler()
        scaler.fit_transform(train_X)
        scaler.transform(test_x)

        # training
        mdl = clf()
        mdl.fit(train_X, train_y)
        
        plot_tree(mdl, rankdir='LR')
#         plt.show()
        fig = plt.gcf()
        fig.set_size_inches(150, 100)
#         fig.savefig('tree.png')
        fig.show()

        # testing
        pred_y = mdl.predict(test_x)
    
        # error
#         abs_error = np.divide((np.abs(np.subtract(test_y, pred_y))), test_y)
        
#         plt_this(abs_error)
#         plt_this(pred_y)
#         plt_this(test_y)
            
#         err = explained_variance_score(test_y, pred_y)
        err = r2_score(test_y, pred_y)
        
        err_list.append(err)
        
        # save results
        save_res(test_x[:,0], pred_y, err, wells[i], algo_name)

    print()

    avg_err = np.mean(err_list)

    for i in range(len(wells)):
        print('Test score on %s : %s' %(wells[i], err_list[i]))

    print()
    print('Average algorithm score: %s' %avg_err)


In [None]:
from xgboost import XGBRegressor

cross_val(XGBRegressor, 'xgbr')