In [16]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
df = pd.read_csv("/Users/daniel421/Desktop/STAT_724/ds_724/2D_MV_200wells.csv")
df.head()

Unnamed: 0,X,Y,facies_threshold_0.3,porosity,permeability,acoustic_impedance
0,565,1485,1,0.1184,6.17,2.009
1,2585,1185,1,0.1566,6.275,2.864
2,2065,2865,2,0.192,92.297,3.524
3,3575,2655,1,0.1621,9.048,2.157
4,1835,35,1,0.1766,7.123,3.979


In [18]:
df.describe(percentiles = [.1,.9]).transpose()

Unnamed: 0,count,mean,std,min,10%,50%,90%,max
X,200.0,2053.4,1113.524641,25.0,414.0,2160.0,3510.0,3955.0
Y,200.0,1876.15,1137.58016,35.0,364.0,1855.0,3475.0,3995.0
facies_threshold_0.3,200.0,1.33,0.471393,1.0,1.0,1.0,2.0,2.0
porosity,200.0,0.1493,0.032948,0.05,0.1061,0.15015,0.19014,0.2232
permeability,200.0,25.287462,64.470135,0.01582,0.26229,4.8255,56.5344,463.641
acoustic_impedance,200.0,3.000435,0.592201,2.009,2.1915,2.9645,3.8336,3.984


### Rename Features
#### Let's rename the facies, permeability and acoustic impedance for convenience

In [19]:
# make a dictionary with old and new feature names
feature_names = {'facies_threshold_0.3':'facies', 'permeability':'perm', 'acoustic_impedance':"ai"}
df = df.rename(columns = feature_names)
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai
0,565,1485,1,0.1184,6.17,2.009
1,2585,1185,1,0.1566,6.275,2.864
2,2065,2865,2,0.192,92.297,3.524
3,3575,2655,1,0.1621,9.048,2.157
4,1835,35,1,0.1766,7.123,3.979


In [20]:
df_subset = df.iloc[0:5, 2:7]
df_subset.head(n=12)

Unnamed: 0,facies,porosity,perm,ai
0,1,0.1184,6.17,2.009
1,1,0.1566,6.275,2.864
2,2,0.192,92.297,3.524
3,1,0.1621,9.048,2.157
4,1,0.1766,7.123,3.979


In [21]:
df_subset2 = df.loc[:4, ['X','facies','porosity','perm']]
df_subset2.head(n=10)

Unnamed: 0,X,facies,porosity,perm
0,565,1,0.1184,6.17
1,2585,1,0.1566,6.275
2,2065,2,0.192,92.297
3,3575,1,0.1621,9.048
4,1835,1,0.1766,7.123


### DEEP COPY EXAMPLE
#### Let's demonstrate a deep copy w/ the DataFrame member function,[my_DataFrame].copy()

note, the[my_DataFrame].loc() member function is a deep copy

In [22]:
df_deep_copy = df.copy(deep = True) # deep copy of the DataFrame
df_deep_copy.loc[4,'ai'] = 4.0 # change a value in the copy and check original
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai
0,565,1485,1,0.1184,6.17,2.009
1,2585,1185,1,0.1566,6.275,2.864
2,2065,2865,2,0.192,92.297,3.524
3,3575,2655,1,0.1621,9.048,2.157
4,1835,35,1,0.1766,7.123,3.979


### SHALLOW COPY EXAMPLE
#### Let's demonstrate a shallow copy w/ the DataFrame member function, [my_DataFrame].copy()

In [23]:
df_shallow_copy = df.copy(deep = False) # deep copy of the DataFrame
df_shallow_copy.loc[3,"ai"] = 4.0 # change a value in the copy and check the original
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai
0,565,1485,1,0.1184,6.17,2.009
1,2585,1185,1,0.1566,6.275,2.864
2,2065,2865,2,0.192,92.297,3.524
3,3575,2655,1,0.1621,9.048,4.0
4,1835,35,1,0.1766,7.123,3.979


In [24]:
zeros12 = np.zeros(len(df))
df['zeros'] = zeros12
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai,zeros
0,565,1485,1,0.1184,6.17,2.009,0.0
1,2585,1185,1,0.1566,6.275,2.864,0.0
2,2065,2865,2,0.192,92.297,3.524,0.0
3,3575,2655,1,0.1621,9.048,4.0,0.0
4,1835,35,1,0.1766,7.123,3.979,0.0


In [25]:
df = df.drop('zeros', axis = 1)
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai
0,565,1485,1,0.1184,6.17,2.009
1,2585,1185,1,0.1566,6.275,2.864
2,2065,2865,2,0.192,92.297,3.524
3,3575,2655,1,0.1621,9.048,4.0
4,1835,35,1,0.1766,7.123,3.979


In [26]:
df['porosity100'] = df['porosity']*100
df['permpor'] = df['perm']/df['porosity']
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai,porosity100,permpor
0,565,1485,1,0.1184,6.17,2.009,11.84,52.111486
1,2585,1185,1,0.1566,6.275,2.864,15.66,40.070243
2,2065,2865,2,0.192,92.297,3.524,19.2,480.713542
3,3575,2655,1,0.1621,9.048,4.0,16.21,55.817397
4,1835,35,1,0.1766,7.123,3.979,17.66,40.334088


In [27]:
df['tporosity'] = np.where(df['porosity']>=0.12,'high','low') # make a new categorical feature
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai,porosity100,permpor,tporosity
0,565,1485,1,0.1184,6.17,2.009,11.84,52.111486,low
1,2585,1185,1,0.1566,6.275,2.864,15.66,40.070243,high
2,2065,2865,2,0.192,92.297,3.524,19.2,480.713542,high
3,3575,2655,1,0.1621,9.048,4.0,16.21,55.817397,high
4,1835,35,1,0.1766,7.123,3.979,17.66,40.334088,high


In [28]:
df['perm_cutoff'] = np.where(df["porosity"]>= 0.12, df['perm'],0.0001) # new feature w/ conditional truncation
df.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai,porosity100,permpor,tporosity,perm_cutoff
0,565,1485,1,0.1184,6.17,2.009,11.84,52.111486,low,0.0001
1,2585,1185,1,0.1566,6.275,2.864,15.66,40.070243,high,6.275
2,2065,2865,2,0.192,92.297,3.524,19.2,480.713542,high,92.297
3,3575,2655,1,0.1621,9.048,4.0,16.21,55.817397,high,9.048
4,1835,35,1,0.1766,7.123,3.979,17.66,40.334088,high,7.123


## Finding Missing Data
### What about missing or invalid values?

- Let's assign a single porosity value to NaN, "not a number", indicating a missing or eroneous value
- We will then check for the number of NaN values in our DataFrame
- Then we can search for and display the sample w/ the NaN porosity value

In [29]:
df.loc[1,'porosity'] = np.NaN #add a NaN/missing value in our table
print("Number of null values in our DataFrame = ", str(df.isnull().sum().sum())) # count missing values
nan_rows = df[df['porosity'].isnull()] # find the sample w/ missing values
print(nan_rows)

Number of null values in our DataFrame =  1
      X     Y  facies  porosity   perm     ai  porosity100    permpor  \
1  2585  1185       1       NaN  6.275  2.864        15.66  40.070243   

  tporosity  perm_cutoff  
1      high        6.275  


## CONDITIONAL SLICING
### One could extract samples into a new DataFrame w/ multiple criteria
- We make a new DataFrame w/ all good porosity and good permeability

In [30]:
df_extract = df.loc[(df['porosity'] > 0.12) & (df['perm'] > 10.0)] # extract w/ multiple conditions to a new table
df_extract.head()

Unnamed: 0,X,Y,facies,porosity,perm,ai,porosity100,permpor,tporosity,perm_cutoff
2,2065,2865,2,0.192,92.297,3.524,19.2,480.713542,high,92.297
6,2295,1325,1,0.179,31.933,3.491,17.9,178.396648,high,31.933
7,3715,3045,2,0.1914,116.781,2.187,19.14,610.141066,high,116.781
13,545,3765,1,0.1817,14.311,3.045,18.17,78.761695,high,14.311
15,1385,2415,2,0.1774,22.578,2.711,17.74,127.271702,high,22.578
