In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from keras.models import save_model, load_model
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, UpSampling2D
from keras.callbacks import TensorBoard
from tensorflow.keras.models import Model
import time
from gmm import custom_autoencoder

In [3]:
import lasio
import pandas as pd

In [6]:
from typing import List


def _nafy_duplicates(ar, chunk_size = 3):
    # If `chunk_size` consecutive values appear before OR after, make into NaN.
    or_up = [ar[i:-i] - ar[2*i:] == 0 for i in range(1, chunk_size+1)]
    or_down = [ar[i:-i] - ar[2*i:] == 0 for i in range(1, chunk_size+1)]
    fix_len_up = [np.concatenate([[True]*i, or_up[i-1], [True]*i]) for i in range(1, chunk_size+1)]
    fix_len_down = [np.concatenate([[True]*i, or_down[i-1], [True]*i]) for i in range(1, chunk_size+1)]
    all_up = np.all(np.stack(fix_len_up, axis=1), axis=1)
    all_down = np.all(np.stack(fix_len_down, axis=1), axis=1)
    ar[all_up | all_down] = np.nan

def clean_data(las: lasio.LASFile, chunk_size: int = 3, ignore_contains: List = None) -> None:
    """If `chunk_size` consecutive values are recorded, make into NaN.

    Args:
        las (lasio.LASFile): The LAS file.
        chunk_size (int, optional): Number of consecutive values. Defaults to 3.
        ignore_contains (List, optional): If there are words in ABBR to ignore. Defaults to ["LITHO"].
    """
    ignore_contains = ["LITHO"] if ignore_contains is None else ignore_contains
    for key, track in las.curvesdict.items():
        if any(s in key for s in ignore_contains):
            continue
        _nafy_duplicates(track.data, chunk_size)

In [5]:
folder = '/mnt/Non-seismic/03 FORCE/FORCE 2020 Wells'
filenames = os.listdir(folder)

df_list = []
for filename in filenames:
    if 'las' in filename:
        data = lasio.read(os.path.join(folder, filename))
        # Move to clean_data.
        clean_data(data)
        rename_map = {x.mnemonic: x.descr.split(' ')[1] for x in data.curves}
          
        df = data.df().copy()
        df = df.rename(columns = rename_map)
        df['filename'] = filename
        df_list.append(df)
df_master = pd.concat(df_list, axis = 0)


In [6]:
df_master.to_csv('/home/geouser05/geo/data/02_preprocessed/well_logs.csv')

In [7]:
df_master.head()

Unnamed: 0_level_0,BS,CALI,DEPTH_MD,DRHO,DTC,DTS,FORCE_2020_LITHOFACIES_CONFIDENCE,FORCE_2020_LITHOFACIES_LITHOLOGY,GR,NPHI,...,z_loc,filename,RSHA,RXO,SP,RMIC,SGR,ROPA,DCAL,MUDWEIGHT
DEPT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
,,,,,,,,,,,...,,35_11-12_logs.las,,,,,,,,
362.4024,,,,,,,,,,,...,,35_11-12_logs.las,,,,,,,,
362.5548,,,,,,,,,,,...,,35_11-12_logs.las,,,,,,,,
362.7072,,,,,,,,,,,...,,35_11-12_logs.las,,,,,,,,
362.8596,,,,,,,,,,,...,,35_11-12_logs.las,,,,,,,,


In [4]:
folder = '/mnt/Non-seismic/01 Poseidon/Spliced well logs provided by occam technology'
filenames = os.listdir(folder)

df_list = []
for filename in filenames:
    if 'LAS' in filename:
        data = lasio.read(os.path.join(folder, filename))
        # Move to clean_data.
        #clean_data(data)
        #rename_map = {x.mnemonic: x.descr.split(' ')[1] for x in data.curves}
          
        df = data.df().copy()
        #df = df.rename(columns = rename_map)
        df['filename'] = filename
        df['field'] = 'Pesidon'
        df_list.append(df)
df_master2 = pd.concat(df_list, axis = 0)


In [5]:
features_base = [
            # 'CALI', 
            # 'DRHO', 
            'DTC', 
            # 'FORCE_2020_LITHOFACIES_CONFIDENCE', 
            'FORCE_2020_LITHOFACIES_LITHOLOGY',
            'GR', 
            'NPHI', 
            # 'PEF', 
            'RDEP', 
            'RHOB', 
            'RMED', 
            # 'ROP',
            # 'RSHA', 
            # 'RXO', 
            # 'SP', 
            # 'RMIC', 
            # 'SGR', 
            # 'ROPA', 
            # 'DCAL',
            # 'MUDWEIGHT'
            ]

In [6]:
for f in features_base:
    if f not in df_master2.columns:
        print(f)

FORCE_2020_LITHOFACIES_LITHOLOGY
RDEP
RMED


In [7]:
df_master2.to_csv('/home/geouser05/geo/data/02_preprocessed/well_logs2.csv')

In [14]:
for x in df_master2.columns.sort_values():
    print(x)

ATRT
ATRX
BATC
CAL1
DCAV
DTC
DTCO
DTS
DTSM
ECGR
GR
GRARC
GRD
HDAR
HROM
HTNP
NPHI
P16H
P34H
RD
RHOB
RHOZ
RS
TNP
TNPH
filename


In [1]:
folder = '/mnt/Non-seismic/01 Poseidon/Spliced well logs provided by occam technology'
os.listdir(folder)

['PoseidonNorth1Decim.LAS',
 'Torosa1Decim.LAS',
 'Pharos1Decim.LAS',
 'Poseidon2Decim.LAS',
 'README FIRST.docx',
 'Kronos1Decim.LAS',
 'Checkshots',
 'Poseidon1Decim.LAS',
 'Proteus1Decim.LAS',
 'Boreas1Decim.LAS']

In [8]:
for f in df_master.columns:
    print(f)

BS
CALI
DEPTH_MD
DRHO
DTC
DTS
FORCE_2020_LITHOFACIES_CONFIDENCE
FORCE_2020_LITHOFACIES_LITHOLOGY
GR
NPHI
PEF
RDEP
RHOB
RMED
ROP
x_loc
y_loc
z_loc
filename
RSHA
RXO
SP
RMIC
SGR
ROPA
DCAL
MUDWEIGHT


In [9]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(16*16*8*8, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((32, 32, 16)))
    assert model.output_shape == (None,32, 32, 16)  # Note: None is the batch size

    model.add(layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same', activation='selu'))
    print(model.output_shape)
    assert model.output_shape == (None, 32, 32, 64)
    #model.add(layers.BatchNormalization())
    #model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', activation='selu'))
    print(model.output_shape)
    assert model.output_shape == (None, 16, 16, 64)
    #model.add(layers.BatchNormalization())
    #model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(8, (5, 5), strides=(1, 1), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 16, 16, 8)

    return model

def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                     input_shape=[16, 16, 8]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    return model


def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
      generated_images = generator(noise, training=True)

      real_output = discriminator(images, training=True)
      fake_output = discriminator(generated_images, training=True)

      gen_loss = generator_loss(fake_output)
      disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))


In [10]:
def train(dataset, epochs, decoder = None):
  for epoch in range(epochs):
    start = time.time()

    for image_batch in dataset:
      train_step(image_batch)

    # Produce images for the GIF as you go
    display.clear_output(wait=True)
    generate_and_save_images(generator,
                             epoch + 1,
                             seed,
                             decoder = decoder)

    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

  # Generate after the final epoch
  display.clear_output(wait=True)
  generate_and_save_images(generator,
                           epochs,
                           seed, decoder = decoder)