In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

In [2]:
!pip install lasio
import lasio

Collecting lasio
  Downloading lasio-0.29-py2.py3-none-any.whl (38 kB)
Installing collected packages: lasio
Successfully installed lasio-0.29


# Load and Display the Well-log Datasets

In [3]:
df = pd.read_csv("well_train.csv")

In [4]:
# To see what the data set looks like, we'll use the head() method.
df.head()

Unnamed: 0,WELL,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
0,15_9-F-11A,2600.0,0.371,2.356,82.748,1.323,7.126,8.648,104.605,261.584
1,15_9-F-11A,2600.1,0.341,2.338,79.399,1.196,6.654,8.578,103.827,262.161
2,15_9-F-11A,2600.2,0.308,2.315,74.248,1.171,6.105,8.578,102.74,262.73
3,15_9-F-11A,2600.3,0.283,2.291,68.542,1.142,5.613,8.547,100.943,263.018
4,15_9-F-11A,2600.4,0.272,2.269,60.314,1.107,5.281,8.523,98.473,263.037


In [5]:
# list the data types for each column
print(df.dtypes)

WELL      object
DEPTH    float64
NPHI     float64
RHOB     float64
GR       float64
RT       float64
PEF      float64
CALI     float64
DT       float64
DTS      float64
dtype: object


In [6]:
df.corr()

Unnamed: 0,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
DEPTH,1.0,-0.010254,0.22288,0.306059,0.046852,-0.008834,0.565062,-0.053425,-0.321054
NPHI,-0.010254,1.0,-0.698239,0.752758,0.009782,-0.416166,-0.126144,0.93287,0.81507
RHOB,0.22288,-0.698239,1.0,-0.297507,0.020202,0.746807,0.326243,-0.792726,-0.653448
GR,0.306059,0.752758,-0.297507,1.0,0.072527,-0.114553,0.094757,0.649474,0.428327
RT,0.046852,0.009782,0.020202,0.072527,1.0,0.040962,0.036128,0.005392,-0.004833
PEF,-0.008834,-0.416166,0.746807,-0.114553,0.040962,1.0,0.271273,-0.549841,-0.436486
CALI,0.565062,-0.126144,0.326243,0.094757,0.036128,0.271273,1.0,-0.17098,-0.291712
DT,-0.053425,0.93287,-0.792726,0.649474,0.005392,-0.549841,-0.17098,1.0,0.849776
DTS,-0.321054,0.81507,-0.653448,0.428327,-0.004833,-0.436486,-0.291712,0.849776,1.0


In [7]:
df.describe()

Unnamed: 0,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
count,24403.0,24403.0,24403.0,24403.0,24403.0,24403.0,24403.0,24403.0,23628.0
mean,3158.52723,0.169979,2.473637,43.695794,53.018156,7.023951,8.627437,77.964946,141.97968
std,294.169613,0.099186,0.146564,59.959823,1630.307353,1.171725,0.046212,14.491084,33.930572
min,2600.0,0.01,1.9806,0.852,0.103,4.2978,8.3604,53.165,83.574
25%,2915.0,0.0998,2.38025,10.2225,1.5511,6.13325,8.5781,67.52915,121.29315
50%,3180.0,0.1427,2.5274,29.925,2.834,7.348,8.625,73.6987,132.18575
75%,3383.35,0.217,2.5783,48.70775,4.512,7.94695,8.6718,86.4485,146.572225
max,3720.0,0.5932,3.0517,1124.403,62290.77,13.841,8.875,126.827,343.944


In [8]:
#Show the header of each column
df.columns

Index(['WELL', 'DEPTH', 'NPHI', 'RHOB', 'GR', 'RT', 'PEF', 'CALI', 'DT',
       'DTS'],
      dtype='object')

In [10]:
# replace (original value) by (original value)/(10^4) cause max(depth) = 3720
df['DEPTH'] = df['DEPTH']/pow(10,4)

In [11]:
df.head()

Unnamed: 0,WELL,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
0,15_9-F-11A,0.26,0.371,2.356,82.748,1.323,7.126,8.648,104.605,261.584
1,15_9-F-11A,0.26001,0.341,2.338,79.399,1.196,6.654,8.578,103.827,262.161
2,15_9-F-11A,0.26002,0.308,2.315,74.248,1.171,6.105,8.578,102.74,262.73
3,15_9-F-11A,0.26003,0.283,2.291,68.542,1.142,5.613,8.547,100.943,263.018
4,15_9-F-11A,0.26004,0.272,2.269,60.314,1.107,5.281,8.523,98.473,263.037


In [12]:
# replace (original value) by (original value - minimun value)/ (maximum value - minimun value)
df['RHOB'] = (df['RHOB'] - df['RHOB'].min())/(df['RHOB'].max() - df['RHOB'].min())

In [13]:
df.head()

Unnamed: 0,WELL,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
0,15_9-F-11A,0.26,0.371,0.350481,82.748,1.323,7.126,8.648,104.605,261.584
1,15_9-F-11A,0.26001,0.341,0.333676,79.399,1.196,6.654,8.578,103.827,262.161
2,15_9-F-11A,0.26002,0.308,0.312202,74.248,1.171,6.105,8.578,102.74,262.73
3,15_9-F-11A,0.26003,0.283,0.289796,68.542,1.142,5.613,8.547,100.943,263.018
4,15_9-F-11A,0.26004,0.272,0.269256,60.314,1.107,5.281,8.523,98.473,263.037


In [16]:
df['GR'].std()

59.95982325161584

In [17]:
# replace (original value) by (original value - mean value)/ (Standard deviation)
df['GR'] = (df['GR'] - df['GR'].mean())/ df['GR'].std()

In [18]:
df.head()

Unnamed: 0,WELL,DEPTH,NPHI,RHOB,GR,RT,PEF,CALI,DT,DTS
0,15_9-F-11A,0.26,0.371,0.350481,0.651306,1.323,7.126,8.648,104.605,261.584
1,15_9-F-11A,0.26001,0.341,0.333676,0.595452,1.196,6.654,8.578,103.827,262.161
2,15_9-F-11A,0.26002,0.308,0.312202,0.509545,1.171,6.105,8.578,102.74,262.73
3,15_9-F-11A,0.26003,0.283,0.289796,0.414381,1.142,5.613,8.547,100.943,263.018
4,15_9-F-11A,0.26004,0.272,0.269256,0.277156,1.107,5.281,8.523,98.473,263.037
