# This file is for deploying files in training data in order to test the strategy

#### Prerequisites

In [1]:
%%capture
!pip install lasio
!pip install catboost

In [2]:
import numpy as np 
import pandas as pd 

import os
import csv
import matplotlib.pyplot as plt
import shutil

import collections
import lasio 
from tqdm.notebook import tqdm
from random import randint
import seaborn as sns
from sklearn.model_selection import train_test_split
import xgboost as xg
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_squared_error as MSE
from scipy import stats

import time
import warnings
from google.colab import drive
warnings.filterwarnings('ignore')

In [25]:
# Downloading LAS Files of each region
!gdown --id 1UMIQ_am-1kLIZvmJ-dgTkpHwvmgbhyuA
!unzip -qq /content/wellT.zip -d 'wellT'
os.remove("/content/wellT.zip")

# !gdown --id 1NjpECBJcVdSxhY8EQ83KgCb-f2WheX5V
# !unzip -qq /content/wellF.zip -d 'wellF'
# os.remove("/content/wellF.zip")

Downloading...
From: https://drive.google.com/uc?id=1UMIQ_am-1kLIZvmJ-dgTkpHwvmgbhyuA
To: /content/wellT.zip
117MB [00:00, 190MB/s] 


#### Run Dictionary

In [16]:
# New dict 

# False dict
alias_dict_F = dict()


alias_dict_F['li_DEC_D'] = ['DPHZ', 'DPHI_LS', 'DPHI', 'DPOR_LS', 'DPO_LS', 'DPHZ_LS'] # from density
alias_dict_F['li_DEC_N'] = ['NPOR_LS', 'ENPH_LS', 'NPHI_LS', 'APLC_LS', 'NPHI_LS_1', 'NPHI', 'SNP'] # from Neutron
alias_dict_F['li_DEC_T'] = ['TNPH_LS_1', 'TNPH_LS', 'TPHI_LS', 'TNPH', 'CNPOR_LS', 'TNPH_LIM'] # thermal
alias_dict_F['li_DEC_S'] = ['SPHI', 'SPHI_LS'] # sonic
alias_dict_F['li_DEC_NOR'] = ['PORS_LS', 'PXND', 'PORZ_LS'] # normal

alias_dict_F['li_USF_S'] = ['DTS', 'DT4ST', 'DT4SR', 'DT1', 'DT4S', 'DTSH', 'DT1T', 'DTTS', 'DTRS', 'DT1R', 'DT2R', 'DTST', 'DT2'] # shear
alias_dict_F['li_USF_P'] = ['DTCO','DT4PR', 'DT4PT', 'DTCO_1', 'DT4P', 'DTTP'] # compressional
alias_dict_F['li_USF_LO'] = ['DTC', 'DT', 'DTLN', 'DTLF', 'DTL'] # long spacing 


alias_dict_F['li_GAPI_D'] = ['GRD1', 'CGRD', 'GRD', 'HCGRD', 'HSGRD'] # density
alias_dict_F['li_GAPI_R'] = ['HCGRR', 'CGRR', 'GRR', 'GRR_R', 'GRR_R2', 'GRR_R1'] # Resistivity
alias_dict_F['li_GAPI_S'] = ['GRS1', 'CGRS', 'ECGRS', 'HSGRS', 'HCGRS'] # Sonic
alias_dict_F['li_GAPI_N'] = ['HGRT', 'GRC', 'GR_EDTC', 'GR', 'ECGREDTC', 'GRT'] # Normal
alias_dict_F['li_GAPI_SPEC'] = ['SGRR', 'HSGR', 'SGRD', 'SGRDD', 'SGR', 'SGRS'] # spectroscopy

alias_dict_F['li_IN_D'] = ['LCALD', 'CALD', 'CALI', 'LCAL', 'LCAL_1'] # density
alias_dict_F['li_IN_R'] = ['CALSR', 'CAL1R', 'CAL1R_R', 'CALSR_R', 'CALR'] # resistivity
alias_dict_F['li_IN_HILT'] = ['HCALS', 'HCAL', 'HCAL_2', 'HCALD', 'HCALR', 'HCAL_1'] # hilt
alias_dict_F['li_IN_HD'] = ['HD2', 'HD1', 'HD', 'HD1_PPC1', 'HDS', 'HD2_PPC2', 'HD2_PPC1'] # hd
alias_dict_F['li_IN_N'] = ['CALX', 'C1', 'C2', 'DCAL'] # normal

alias_dict_F['li_SH'] = ['AE10','AF10','AHT10','AT10','HLLS','HMIN','HMNO','LLS','LLS_R','LLS_R1','LLS_R2','MSFL','MSFL_R','RXO8','SFLU']
alias_dict_F['li_DP'] =['AHT90', 'AF90', 'ILD', 'AT90', 'HRID', 'AE90', 'LLD_R1', 'LLD_R2', 'RT_HRLT', 'LLD', 'HLLD', 'LLD_R']
alias_dict_F['li_MD'] = ['AE20','AE30','AF20','AHT20','AHT30','AT20','AT30','AO30','ILM','TBIT20','ILM','ILM_1','IMBC','IMPH','RXOZ','RXOZ_R','RXO_HRLT']
alias_dict_F['li_RLA'] = ['RLA1','RLA2','RLA3','RLA4','RLA5']

alias_dict_F['li_BE'] = ['PEFL', 'PEFZ', 'PE', 'PEF']

alias_dict_F['li_MV'] = ['SPR']

alias_dict_F['li_LBF_D'] = ['TEND_1', 'TEND1', 'TEND'] # density
alias_dict_F['li_LBF_R'] = ['TENR_R', 'TENR_R', 'TENR_R2', 'TENR', 'TENR_R1'] # resistivity
alias_dict_F['li_LBF_S'] = ['TENS1', 'LTEN', 'TENS'] # sonic
alias_dict_F['li_LBF_N'] = ['TENT', 'TEN'] # normal


alias_F = {"DTSM" :['DTSM'],
         "DEC_D"  : alias_dict_F['li_DEC_D'],
         "DEC_N"  : alias_dict_F['li_DEC_N'],
         "DEC_T"  : alias_dict_F['li_DEC_T'],
         "DEC_S"  : alias_dict_F['li_DEC_S'],
         "DEC_NOR"  : alias_dict_F['li_DEC_NOR'],

         "USF_S"  : alias_dict_F['li_USF_S'],
         "USF_P"  : alias_dict_F['li_USF_P'],
         "USF_LO"  : alias_dict_F['li_USF_LO'],

         "GAPI_D" : alias_dict_F['li_GAPI_D'],
         "GAPI_R" : alias_dict_F['li_GAPI_R'],
         "GAPI_S" : alias_dict_F['li_GAPI_S'],
         "GAPI_N" : alias_dict_F['li_GAPI_N'],
         "GAPI_SPEC" : alias_dict_F['li_GAPI_SPEC'],

         "IN_D"   : alias_dict_F['li_IN_D'],
         "IN_R"   : alias_dict_F['li_IN_R'],
         "IN_HILT"   : alias_dict_F['li_IN_HILT'],
         "IN_N"   : alias_dict_F['li_IN_N'],

         "MV"   : alias_dict_F['li_MV'],

         "BE"   : alias_dict_F['li_BE'],

         "LBF_D"  : alias_dict_F['li_LBF_D'],
         "LBF_R"  : alias_dict_F['li_LBF_R'],
         "LBF_S"  : alias_dict_F['li_LBF_S'],
         "LBF_N"  : alias_dict_F['li_LBF_N'],

         "SH"   : alias_dict_F['li_SH'],
         "DP"   : alias_dict_F['li_DP'], 
         "RL"   : alias_dict_F['li_RLA']
 }
#########################################################

# True dict 
alias_dict_T = dict()
alias_dict_T['li_SH'] = ['AT10', 'AE10', 'AHT10', 'LLS', 'SFL', 'AST10'] # shallow
alias_dict_T['li_MD'] = ['AE20', 'IMBC', 'AF20','AE30', 'ILM_1', 'AST30', 'IMPH', 'AHT30','ILM', 'AHT20', 'TBIT20', 'TBIT30'] # Medium
alias_dict_T['li_DP'] = ['ILD1', 'ILD_1', 'AT90', 'AE90', 'AST90', 'LLD', 'ILD', 'AHT90', 'IDPH'] # deep
alias_dict_T['li_SFL'] = ['SFLA', 'SFLU', 'SFLU', 'SFLU_1', 'RILD']
alias_dict_T['li_RLA'] = ['RLA3', 'RLA1', 'RLA5']

alias_dict_T['li_DEC_NP'] = ['NPHI', 'NPOR', 'NPHI_LS', 'NPOR_LS', 'NPHS', 'NPHI1', 'CNC'] # Neutron
alias_dict_T['li_DEC_DP'] = ['DPHZ2', 'DPHZ_LS', 'DPHI_LS', 'DPHI', 'DPOR', 'DPHI_SLDT', 'DPHZ', 'PORZ', 'DPO_LS', 'DPO'] # Density
alias_dict_T['li_DEC_ST'] = ['SPHI_SS', 'SPHI', 'TNPH_LS', 'SPHI_LS'] # Sonic

alias_dict_T['li_USF_S'] = ['DT1', 'DTSM_SLOW', 'DTSM_FAST', 'DT4S', 'DTRS', 'DTC', 'DTTS', 'DTS', 'DTMD', 'DTST', 'DT', 'DTOT', 'DTM'] # Shear
alias_dict_T['li_USF_P'] = ['DT4P', 'DTRP', 'DTTP', 'DTC', 'DTCO_1', 'DTCO', 'DT'] # Compressional

alias_dict_T['li_GAPI_D'] = ['GRD', 'SGRD', 'ECGRD', 'SGR'] # from Density
alias_dict_T['li_GAPI_R']  = ['ECGRR', 'GRR'] # from resistivity
alias_dict_T['li_GAPI_S'] = ['GRN', 'ECGRS'] # from sonic
alias_dict_T['li_GAPI_N'] = ['GR_EDTC', 'GR_STGC', 'GR']  # normal Gamma API

alias_dict_T['li_IN_D'] = ['C1', 'LCAL', 'CALI', 'CALD', 'LCALD'] # from density
alias_dict_T['li_IN_R'] = ['CALI_SPCS', 'CALR'] # from resistivity
alias_dict_T['li_IN_HILT'] =  ['HCAL2R', 'HCAL_1', 'HCAL_2', 'HCALD', 'HCALR', 'HCALS', 'HCAL'] # from HILT
 
alias_dict_T['li_LB_D'] = ['TEND'] # from density
alias_dict_T['li_LB_R'] = ['TENR'] # from Resistivity
alias_dict_T['li_LB_S'] = ['TENS'] # from Sonic

alias_dict_T['li_BE'] = ['PEF_SLDT', 'PE', 'PEFZ', 'PEF', 'PEFS', 'PEFL']

alias_dict_T['li_MV'] = ['SPR']


alias_T = {"DTSM" :['DTSM'],
           
         "DEC_NP"  : alias_dict_T['li_DEC_NP'],
         "DEC_DP"  : alias_dict_T['li_DEC_DP'],
         "DEC_ST"  : alias_dict_T['li_DEC_ST'],

         "USF_S"  : alias_dict_T['li_USF_S'],
         "USF_P"  : alias_dict_T['li_USF_P'],

         "GAPI_D" : alias_dict_T['li_GAPI_D'],
         "GAPI_R" : alias_dict_T['li_GAPI_R'],
         "GAPI_S" : alias_dict_T['li_GAPI_S'],
         "GAPI_N" : alias_dict_T['li_GAPI_N'],

         "IN_D"   : alias_dict_T['li_IN_D'],
         "IN_R"   : alias_dict_T['li_IN_R'],
         "IN_HILT"  : alias_dict_T['li_IN_HILT'],

         "MV"   : alias_dict_T['li_MV'],

         "BE"   : alias_dict_T['li_BE'],

          "SH"  : alias_dict_T['li_SH'],
          "DP"  : alias_dict_T['li_DP'],
         "SFL"  : alias_dict_T['li_SFL'],

         "LB_D"  : alias_dict_T['li_LB_D'],
         "LB_R"  : alias_dict_T['li_LB_R'],
         "LB_S"  : alias_dict_T['li_LB_S']
 }

########################################################




#### DEPLOYMENT STARTS 

In [32]:
# Creating list of file path names
from fnmatch import fnmatch

root = '/content/wellF'
root1 = '/content/'
pattern = "*.las"
list_path_F = []
list_name_F = []

for path, subdirs, files in os.walk(root):
    for name in files:
        if fnmatch(name, pattern):
            a = os.path.join(path, name)
            list_path_F.append(a)
            list_name_F.append(name)
print(len(list_name_F))
print(len(list_path_F))
print(root + '/*.las')

142
142
/content/wellF/*.las


In [34]:
# This function can deploy a file name or range of files
def DeployTest(filename = ''):

      def wellydf( df, alias_dict ):

          df2 = pd.DataFrame()
          for alias, columns in alias_dict.items(): 
            for column in columns:
              if column in df.columns.values:
                df2[alias] = df[column]
                break
          return df2

      def Deploy(filepath, filename):
          
          X = lasio.read(filepath)
          X.write(filename)
          os.remove(filepath)
         
          
          # making wellF data
          folder_name = '/content/wellF'

          x = pd.DataFrame()
          files = os.listdir(folder_name)


          for y in tqdm(files):
            temp = lasio.read( folder_name + '/' + y ).df()
            z = wellydf( temp, alias_F)
            z = z.reset_index(drop=True)
            z['DEPT'] = temp.index
            x = pd.concat([x,z], axis=0, ignore_index=True)

          x = x[x['DTSM'].notna()]

          print(' ')
          print(filename)
          las = lasio.read(root1 + filename)
          dep = las.df()
          print("DEP DATA INFO")
          dep.info()
          dep_data = wellydf(dep, alias_F)
          dep_data = dep_data[dep_data['DTSM'].notna()]

          print("NEW DEP DATA INFO")
          dep_data.info()
          dep_data = dep_data.dropna(axis = 1, how = 'all')

          list_columns = []
          for i in dep_data.columns:
            list_columns.append(i)

          dep_data_columns1 = list_columns.copy()
          dep_data_columns1.remove('DTSM')

          d = pd.DataFrame(x[list_columns], index = None)

          if len(list_columns) == 3: thresh = ((len(list_columns)-1)//2) + 1
          elif len(list_columns) < 3: print("Only 1 column"); thresh = 1;
          else : thresh = (len(dep_data_columns1)//2) + 1;
          print("THRESH: {}" .format(thresh))

          d = d.dropna(thresh = thresh)
          X = pd.DataFrame(d[dep_data_columns1], index = None)
          Y = pd.DataFrame(d['DTSM'], index = None)
          print("FINAL TRAINING DATA INFO")
          X.info()
          
          print("Training starts...")
          #catboost
          train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.2)
          cat = CatBoostRegressor(verbose = 0,learning_rate  = 0.01,
                                      task_type='GPU',
                                      iterations = 1500, depth = 8)
          cat.fit(train_x,train_y)
          pred = cat.predict(test_x)
          rmse = np.sqrt(MSE(test_y,pred))
          print(rmse)  
          dep_X = pd.DataFrame(dep_data[dep_data_columns1])
          target = pd.DataFrame(dep_data['DTSM'])
          dep_pred_cat = cat.predict(dep_X)

          #xgb
          train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.2)
          xgb_r = xg.XGBRegressor(objective='reg:squarederror', learning_rate= 0.1,
                                      tree_method = 'gpu_hist',
                                    n_estimators =1000,
                                    max_depth = 8)
          xgb_r.fit(train_x,train_y)
          pred = xgb_r.predict(test_x)
          rmse = np.sqrt(MSE(test_y,pred))
          print(rmse)

          dep_X = pd.DataFrame(dep_data[dep_data_columns1])
          target = pd.DataFrame(dep_data['DTSM'])
          dep_pred_xgb = xgb_r.predict(dep_X)
          print("Target: {}" .format(target.values[10:20]))
          print("CAR: {}" .format(dep_pred_cat[10:20]))
          print("XGB: {}" .format(dep_pred_xgb[10:20]))
          print('CAT:',np.sqrt(MSE(target,dep_pred_cat)))
          print('XGB:',np.sqrt(MSE(target,dep_pred_xgb)))
          print('ensemble:', np.sqrt(MSE(target,(4*dep_pred_cat + dep_pred_xgb)/5 )) )

          !rm -rf wellF


      root = '/content/wellF/'
      root1 = '/content/'

      if len(filename) == 0:
          for i in range(20):
            print(i)
            !gdown --id 1NjpECBJcVdSxhY8EQ83KgCb-f2WheX5V
            !unzip -qq /content/wellF.zip -d 'wellF'
            os.remove("/content/wellF.zip")

            # !gdown --id 1UMIQ_am-1kLIZvmJ-dgTkpHwvmgbhyuA
            # !unzip -qq /content/wellT.zip -d 'wellT'
            # os.remove("/content/wellT.zip")


            value = randint(0,len(list_name_F)-1)
            print(value)
            filename = list_name_F[value]
            filepath = list_path_F[value]
            Deploy(filepath, filename)
      else:
          !gdown --id 1NjpECBJcVdSxhY8EQ83KgCb-f2WheX5V
          !unzip -qq /content/wellF.zip -d 'wellF'
          os.remove("/content/wellF.zip")

          # !gdown --id 1UMIQ_am-1kLIZvmJ-dgTkpHwvmgbhyuA
          # !unzip -qq /content/wellT.zip -d 'wellT'
          # os.remove("/content/wellT.zip")

          Deploy(root + filename, filename)

In [None]:
# Run the function here
!rm -rf wellF
DeployTest(filename = '')