In [None]:
import enum
import pathlib
import random

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
from orchid import (project_loader as opl)

In [None]:
import toolz.curried as toolz

In [None]:
# noinspection PyUnresolvedReferences
from System import DateTime, DateTimeOffset, DBNull, TimeSpan
# noinspection PyUnresolvedReferences
import UnitsNet

In [None]:
project_filenames = {
    # 'permian-a': 'Project-frankNstein_Permian_UTM13FT_0412_PjtDataFrame.ifrac',
    'gng': 'GnG_DemoProject_wDataFrames.ifrac',
}

In [None]:
test_data_path = pathlib.Path('c:/src/Orchid.IntegrationTestData/')
project_path_names = toolz.valmap(
    lambda fn: test_data_path.joinpath(fn), project_filenames
)
project_path_names

In [None]:
projects = toolz.valmap(
    lambda pn: opl.ProjectLoader(str(pn)).native_project(),
    project_path_names)
projects

In [None]:
def make_project_data_frames_by_id(project):
    return {df.ObjectId: df for df in project.DataFrames.Items}

project_net_data_frames_by_id = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_id),
)
project_net_data_frames_by_id

In [None]:
def make_project_data_frames_by_name(project):
    return {df.Name: df for df in project.DataFrames.Items}

project_net_data_frames_by_name = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_name),
)
project_net_data_frames_by_name

In [None]:
# len(list(toolz.get('permian-a', project_net_data_frames_by_name)))
len(list(toolz.get('gng', project_net_data_frames_by_name)))

In [None]:
project_net_data_frames_by_name

In [None]:
class PermianFrames(enum.Enum):
    PROJECT = 'Project Data Frame 01'
    FDI = 'FDI Observations'
    MICROSEISMIC = 'C3-Microseismic Data Frame 01'

class GnGFrames(enum.Enum):
    PROJECT = 'Project Data Frame 01'
    FAULT = 'Fault Trace Set Data Frame 01'
    STAGE = 'Stage Data Frame 01'
    WELL_LOG = 'Well Log Set Data Frame'
    HORIZON = 'Horizon Marker Set Data Frame01'

In [None]:
project_only = toolz.valmap(
    toolz.keyfilter(lambda n: n == GnGFrames.PROJECT.value),
    project_net_data_frames_by_name)
project_only
# horizon_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == 'Horizon Marker Set Data Frame01'),
#                     project_net_data_frames_by_name)
# horizon_only
# fdi_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == PermianFrames.FDI.value),
#     project_net_data_frames_by_name)
# fdi_only
# microseismic_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == PermianFrames.MICROSEISMIC.value),
#     project_net_data_frames_by_name)
# microseismic_only


In [None]:
project_net_data_frames_by_name = project_only
# project_net_data_frames_by_name = horizon_only
# project_net_data_frames_by_name = project_only
# project_net_data_frames_by_name = fdi_only
# project_net_data_frames_by_name = microseismic_only

project_net_data_frames_by_name

In [None]:
# Adapted from code at
# https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/dataset-datatable-dataview/creating-a-datareader
# retrieved on 18-Apr-2021.

def table_row_to_dict(reader):
    def map_value(rv, col_type):
        if rv == DBNull.Value:
            return None
        
        if col_type == DateTimeOffset.UtcNow.GetType():
            return rv.ToString('o')
            
        if col_type == DateTime.UtcNow.GetType():
            rv_text = rv.ToString('o')
            print(f'"{rv_text}" {col_type}')
            raise TypeError(f'Unexpected `DateTime` {rv_text}')

        return rv
    
    seed = {'Sample': None}
    indices = range(reader.FieldCount)
    names = [reader.GetName(i) for i in indices]
    col_types = [reader.GetFieldType(i) for i in indices]
    raw_values = [reader[name] for name in names]
    values = toolz.map(map_value, raw_values, col_types)
    table_result = dict(zip(names, values))
    result = toolz.merge(seed, table_result)
    return result

def read_data_table(data_table):
    reader = data_table.CreateDataReader()
    try:
        while True:
            if reader.HasRows:
                has_row = reader.Read()
                while has_row:
                    yield table_row_to_dict(reader)
                    has_row = reader.Read()
            else:
                return
            if not reader.NextResult():
                break
    finally:
        reader.Dispose()

def table_to_data_frame(data_table):
    return pd.DataFrame(data=[r for r in read_data_table(data_table)])

In [None]:
net_data_frame_to_pandas_data_frame = toolz.compose(table_to_data_frame,
                                                    lambda net_df: net_df.DataTable)

project_pandas_data_frames_by_name = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(toolz.valmap(net_data_frame_to_pandas_data_frame)),
)

In [None]:
gng_project_frame_columns = list(
    toolz.get_in(['gng', GnGFrames.PROJECT.value],
                 project_pandas_data_frames_by_name).columns.values
)
print(gng_project_frame_columns)
# permian_project_frame_columns = list(
#     toolz.get_in(['permian-a', PermianFrames.PROJECT.value],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(permian_project_frame_columns)
# permian_fdi_frame_columns = list(
#     toolz.get_in(['permian-a', PermianFrames.FDI.value],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(permian_fdi_frame_columns)
# permian_microseismic_frame_columns = list(
#     toolz.get_in(['permian-a', PermianFrames.MICROSEISMIC.value],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(permian_microseismic_frame_columns)

In [None]:
gng_project_sampled_columns = [
    'Surface  Hole Easting ', 'Bottom Hole Northing ', 'Bottom Hole TDV ',
    'StageNumber', 'StageLength', 'Pnet'
]
gng_project_sampled_columns
# permian_project_sampled_columns = [
#     'Bottom Hole Easting ', 'MDBottom', 'PartEndTime',
#     'StagePartPumpedVolume', 'Pnet', 'PumpTime'
# ]
# permian_project_sampled_columns
# permian_fdi_sampled_columns = [
#     'ObservationSetName', 'TreatmentStagePartNumber', 'Timestamp',
#     'DeltaT', 'DeltaP', 'VolumeToPick'
# ]
# permian_fdi_sampled_columns
# permian_microseismic_sampled_columns = [
#     'Timestamp', 'Northing', 'DepthTvdSs',
#     'Distance3d', 'PlanarDistanceAzimuth', 'VerticalDistance',
# ]
# permian_microseismic_sampled_columns

In [None]:
[gng_project_frame_columns.index(cn) for cn in gng_project_sampled_columns]
# [permian_project_frame_columns.index(cn) for cn in permian_project_sampled_columns]
# [permian_fdi_frame_columns.index(cn) for cn in permian_fdi_sampled_columns]
# [permian_microseismic_frame_columns.index(cn) for cn in permian_microseismic_sampled_columns]

In [None]:
all_indices = toolz.compose(list, range, len)

@toolz.curry
def sample_data_frame_column_indices(data_frame):
    # Gng Project
    result = toolz.concatv(['Sample'], gng_project_sampled_columns)
    # # Permian project
    # result = toolz.concatv(['Sample'], permian_project_sampled_columns)
    # # Permian FDI
    # result = toolz.concatv(['Sample'], permian_fdi_sampled_columns)
    # # Permian Microseismic
    # result = toolz.concatv(['Sample'], permian_microseismic_sampled_columns)

    return result

@toolz.curry
def sample_data_frame_row_indices(data_frame):
    result = [0, 23, 37, 58, 65, 89, 170, 210]  # GnG project
    # result = [0, 31, 39, 51, 52, 58, 62, 87]  # Permian project
    # Added sample 26 to FDI observations because of MaxValue issues
    # result = [0, 1, 20, 26, 28, 45, 52, 53, 83]  # Permian fdi
    # result = [0, 12, 79, 96, 99, 330, 366, 479]  # Permian microseismic

    return result

In [None]:
@toolz.curry
def sample_data_frame_indices(indices_func, data_frame):
    return indices_func(data_frame)

In [None]:
sampled_column_indices_by_name = toolz.valmap(
    toolz.valmap(sample_data_frame_column_indices), project_pandas_data_frames_by_name
)
sampled_column_indices_by_name

In [None]:
sampled_row_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_row_indices),
                                           project_pandas_data_frames_by_name)
sampled_row_indices_by_name

In [None]:
def add_sample_numbers(items):
    data_frame, row_indices, column_indices = items
    sample_index = (list(data_frame.columns.values)).index('Sample')
    for row_index in row_indices:
        data_frame.iloc[row_index, [sample_index]] = row_index
    return data_frame, row_indices, column_indices

data_frames_to_sample_by_name = toolz.merge_with(toolz.merge_with(add_sample_numbers),
                                                 project_pandas_data_frames_by_name,
                                                 sampled_row_indices_by_name,
                                                 sampled_column_indices_by_name)
data_frames_to_sample_by_name

In [None]:
def sample_data_frame(data_frame, row_indices, column_indices):
    result = data_frame.iloc[row_indices, :].loc[:, column_indices]
    return result

In [None]:
# GnG
sampled_project_data_frames_by_name = toolz.valmap(
    toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
)
sampled_project_data_frames_by_name

# Permian
# sampled_project_data_frames_by_name = toolz.valmap(
#     toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
# )
# sampled_project_data_frames_by_name
# sampled_fdi_data_frames_by_name = toolz.valmap(
#     toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
# )
# sampled_fdi_data_frames_by_name
# sampled_microseismic_data_frames_by_name = toolz.valmap(
#     toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
# )
# sampled_microseismic_data_frames_by_name

In [None]:
# GnG
toolz.get_in(['gng', GnGFrames.PROJECT.value],
              sampled_project_data_frames_by_name)

# Permian
# toolz.get_in(['permian-a', PermianFrames.PROJECT.value],
#               sampled_project_data_frames_by_name)
# toolz.get_in(['permian-a', PermianFrames.FDI.value],
#               sampled_fdi_data_frames_by_name)
# toolz.get_in(['permian-a', PermianFrames.MICROSEISMIC.value],
#               sampled_microseismic_data_frames_by_name)