In [1]:
import pathlib
import random

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [3]:
from orchid import (project_loader as opl)

In [4]:
import option
import toolz.curried as toolz

In [5]:
# noinspection PyUnresolvedReferences
from System import DateTime, DateTimeOffset, DBNull, TimeSpan
# noinspection PyUnresolvedReferences
import UnitsNet

In [6]:
project_filenames = {
    'permian-a': 'Project-frankNstein_Permian_UTM13FT_0412_PjtDataFrame.ifrac',
    # 'gng': 'GnG_DemoProject_wDataFrames.ifrac',
}

In [7]:
test_data_path = pathlib.Path('c:/src/Orchid.IntegrationTestData/')
project_path_names = toolz.valmap(
    lambda fn: test_data_path.joinpath(fn), project_filenames
)
project_path_names

{'permian-a': WindowsPath('c:/src/Orchid.IntegrationTestData/Project-frankNstein_Permian_UTM13FT_0412_PjtDataFrame.ifrac')}

In [8]:
projects = toolz.valmap(
    lambda pn: opl.ProjectLoader(str(pn)).native_project(),
    project_path_names)
projects

{'permian-a': <Orchid.FractureDiagnostics.Factories.Implementations.Project object at 0x000001B149245100>}

In [9]:
def make_project_data_frames_by_id(project):
    return {df.ObjectId: df for df in project.DataFrames.Items}

project_net_data_frames_by_id = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_id),
)
project_net_data_frames_by_id

{'permian-a': {<System.Guid object at 0x000001B14922B910>: <Orchid.FractureDiagnostics.Factories.DataFrames.ProjectDataFrame object at 0x000001B14922BFA0>,
  <System.Guid object at 0x000001B149245CD0>: <Orchid.FractureDiagnostics.Factories.DataFrames.ObservationSetDataFrame object at 0x000001B149245CA0>,
  <System.Guid object at 0x000001B149245F40>: <Orchid.FractureDiagnostics.Factories.DataFrames.MicroseismicDataFrame object at 0x000001B149245F10>}}

In [10]:
def make_project_data_frames_by_name(project):
    return {df.Name: df for df in project.DataFrames.Items}

project_net_data_frames_by_name = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_name),
)
project_net_data_frames_by_name

{'permian-a': {'Project Data Frame 01': <Orchid.FractureDiagnostics.Factories.DataFrames.ProjectDataFrame object at 0x000001B12D7A0880>,
  'FDI Observations': <Orchid.FractureDiagnostics.Factories.DataFrames.ObservationSetDataFrame object at 0x000001B149239790>,
  'C3-Microseismic Data Frame 01': <Orchid.FractureDiagnostics.Factories.DataFrames.MicroseismicDataFrame object at 0x000001B149239D00>}}

In [11]:
len(list(toolz.get('permian-a', project_net_data_frames_by_name)))
# len(list(toolz.get('gng', project_net_data_frames_by_name)))

3

In [12]:
project_net_data_frames_by_name

{'permian-a': {'Project Data Frame 01': <Orchid.FractureDiagnostics.Factories.DataFrames.ProjectDataFrame object at 0x000001B12D7A0880>,
  'FDI Observations': <Orchid.FractureDiagnostics.Factories.DataFrames.ObservationSetDataFrame object at 0x000001B149239790>,
  'C3-Microseismic Data Frame 01': <Orchid.FractureDiagnostics.Factories.DataFrames.MicroseismicDataFrame object at 0x000001B149239D00>}}

In [13]:
# horizon_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == 'Horizon Marker Set Data Frame01'),
#                     project_net_data_frames_by_name)
# horizon_only
# microseismic_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == 'C3-Microseismic Data Frame 01'),
#     project_net_data_frames_by_name)
# microseismic_only
# project_only = toolz.valmap(
#     toolz.keyfilter(lambda n: n == 'Project Data Frame 01'),
#     project_net_data_frames_by_name)
# project_only
fdi_only = toolz.valmap(
    toolz.keyfilter(lambda n: n == 'FDI Observations'),
    project_net_data_frames_by_name)
fdi_only


{'permian-a': {'FDI Observations': <Orchid.FractureDiagnostics.Factories.DataFrames.ObservationSetDataFrame object at 0x000001B149239790>}}

In [14]:
# project_net_data_frames_by_name = horizon_only
# project_net_data_frames_by_name = microseismic_only
# project_net_data_frames_by_name = project_only
project_net_data_frames_by_name = fdi_only

project_net_data_frames_by_name

{'permian-a': {'FDI Observations': <Orchid.FractureDiagnostics.Factories.DataFrames.ObservationSetDataFrame object at 0x000001B149239790>}}

In [15]:
# Adapted from code at
# https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/dataset-datatable-dataview/creating-a-datareader
# retrieved on 18-Apr-2021.

def table_row_to_dict(reader):
    def map_value(rv, col_type):
        if rv == DBNull.Value:
            return None
        
        if col_type == DateTimeOffset.UtcNow.GetType():
            return rv.ToString('o')
            
        if col_type == DateTime.UtcNow.GetType():
            rv_text = rv.ToString('o')
            print(f'"{rv_text}" {col_type}')
            raise TypeError(f'Unexpected `DateTime` {rv_text}')

        return rv
    
    seed = {'Sample': None}
    indices = range(reader.FieldCount)
    names = [reader.GetName(i) for i in indices]
    col_types = [reader.GetFieldType(i) for i in indices]
    raw_values = [reader[name] for name in names]
    values = toolz.map(map_value, raw_values, col_types)
    table_result = dict(zip(names, values))
    result = toolz.merge(seed, table_result)
    return result

def read_data_table(data_table):
    reader = data_table.CreateDataReader()
    try:
        while True:
            if reader.HasRows:
                has_row = reader.Read()
                while has_row:
                    yield table_row_to_dict(reader)
                    has_row = reader.Read()
            else:
                return
            if not reader.NextResult():
                break
    finally:
        reader.Dispose()

def table_to_data_frame(data_table):
    return pd.DataFrame(data=[r for r in read_data_table(data_table)])

In [16]:
net_data_frame_to_pandas_data_frame = toolz.compose(table_to_data_frame,
                                                    lambda net_df: net_df.DataTable)

project_pandas_data_frames_by_name = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(toolz.valmap(net_data_frame_to_pandas_data_frame)),
)

In [17]:
# gng_project_frame_columns = list(
#     toolz.get_in(['gng', 'Project Data Frame 01'],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(gng_project_frame_columns)
# permian_project_frame_columns = list(
#     toolz.get_in(['permian-a', 'Project Data Frame 01'],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(permian_project_frame_columns)
permian_fdi_frame_columns = list(
    toolz.get_in(['permian-a', 'FDI Observations'],
                 project_pandas_data_frames_by_name).columns.values
)
print(permian_fdi_frame_columns)
# permian_microseismic_frame_columns = list(
#     toolz.get_in(['permian-a', 'C3-Microseismic Data Frame 01'],
#                  project_pandas_data_frames_by_name).columns.values
# )
# print(permian_microseismic_frame_columns)

['Sample', 'ProjectName', 'ObservationSetName', 'MonitorName', 'MonitorWellName', 'MonitorStageNumber', 'TreatmentWellName', 'TreatmentStagePartNumber', 'SignalQuality', 'Timestamp', 'DeltaT', 'DeltaP', 'VolumeToPick', 'ProppantMass', 'Energy', 'Notes', 'Shortest distance', 'Distance along the Azimuth']


In [18]:
# gng_project_sampled_columns = [
#     'Surface  Hole Easting ', 'Bottom Hole Northing ', 'Bottom Hole TDV ',
#     'StageNumber', 'StageLength', 'Pnet'
# ]
# gng_project_sampled_columns
# permian_project_sampled_columns = [
#     'Bottom Hole Easting ', 'MDBottom', 'PartEndTime',
#     'StagePartPumpedVolume', 'Pnet', 'PumpTime'
# ]
# permian_project_sampled_columns
permian_fdi_sampled_columns = [
    'ObservationSetName', 'TreatmentStagePartNumber', 'Timestamp',
    'DeltaT', 'DeltaP', 'VolumeToPick'
]
permian_fdi_sampled_columns
# permian_microseismic_sampled_columns = [
#     'Timestamp', 'Northing', 'DepthTvdSs',
#     'Distance3d', 'PlanarDistanceAzimuth', 'VerticalDistance',
# ]
# permian_microseismic_sampled_columns

['ObservationSetName',
 'TreatmentStagePartNumber',
 'Timestamp',
 'DeltaT',
 'DeltaP',
 'VolumeToPick']

In [19]:
# [gng_project_frame_columns.index(cn) for cn in permian_project_sampled_columns]
# [permian_project_frame_columns.index(cn) for cn in permian_project_sampled_columns]
[permian_fdi_frame_columns.index(cn) for cn in permian_fdi_sampled_columns]

[2, 7, 9, 10, 11, 12]

In [20]:
all_indices = toolz.compose(list, range, len)

@toolz.curry
def sample_data_frame_column_indices(data_frame):
    # Ignore the seeded `Sample` column and the `ProjectName` column
    candidate_column_indices = all_indices(data_frame.columns)[2:]
    random.shuffle(candidate_column_indices)
    # Randomly sample six columns in addition to the 'Sample' column
    # result = [0] + sorted(candidate_column_indices[:6])
    # # Permian project
    # result = toolz.concatv(['Sample'], permian_project_sampled_columns)
    # Permian FDI
    result = toolz.concatv(['Sample'], permian_fdi_sampled_columns)
    return result

@toolz.curry
def sample_data_frame_row_indices(data_frame):
    candidate_row_indices = all_indices(data_frame)[1:-1]
    random.shuffle(candidate_row_indices)
    # result = [0, 23, 37, 58, 65, 89, 170, 210]  # GnG project
    # result = [0, 31, 39, 51, 52, 58, 62, 87]  # Permian project
    # Added sample 26 to FDI observations because of MaxValue issues
    result = [0, 1, 20, 26, 28, 45, 52, 53, 83]  # Permian fdi
    # result = [0, 12, 79, 96, 99, 330, 366, 479]  # Permian microseismic
    return result

In [21]:
@toolz.curry
def sample_data_frame_indices(indices_func, data_frame):
    return indices_func(data_frame)

In [22]:
sampled_column_indices_by_name = toolz.valmap(
    toolz.valmap(sample_data_frame_column_indices), project_pandas_data_frames_by_name
)
sampled_column_indices_by_name

{'permian-a': {'FDI Observations': <itertools.chain at 0x1b146c66d60>}}

In [23]:
sampled_row_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_row_indices),
                                           project_pandas_data_frames_by_name)
sampled_row_indices_by_name

{'permian-a': {'FDI Observations': [0, 1, 20, 26, 28, 45, 52, 53, 83]}}

In [24]:
def add_sample_numbers(items):
    data_frame, row_indices, column_indices = items
    sample_index = (list(data_frame.columns.values)).index('Sample')
    for row_index in row_indices:
        data_frame.iloc[row_index, [sample_index]] = row_index
    return data_frame, row_indices, column_indices

data_frames_to_sample_by_name = toolz.merge_with(toolz.merge_with(add_sample_numbers),
                                                 project_pandas_data_frames_by_name,
                                                 sampled_row_indices_by_name,
                                                 sampled_column_indices_by_name)
data_frames_to_sample_by_name

{'permian-a': {'FDI Observations': (   Sample                         ProjectName ObservationSetName  \
   0       0  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   1       1  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   2    None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   3    None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   4    None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   ..    ...                                 ...                ...   
   79   None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   80   None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   81   None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   82   None  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   83     83  frankNstein_Permian_UTM13FT_Phase2   FDI Observations   
   
                    MonitorName MonitorWellName MonitorStageNumber  \
   0   P1 - 12555 - MonitorWell          

In [25]:
def sample_data_frame(data_frame, row_indices, column_indices):
    result = data_frame.iloc[row_indices, :].loc[:, column_indices]
    return result

In [26]:
# sampled_project_data_frames_by_name = toolz.valmap(
#     toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
# )
# sampled_project_data_frames_by_name
sampled_fdi_data_frames_by_name = toolz.valmap(
    toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
)
sampled_fdi_data_frames_by_name
# sampled_microseismic_data_frames_by_name = toolz.valmap(
#     toolz.valmap(lambda e: sample_data_frame(*e)), data_frames_to_sample_by_name
# )
# sampled_microseismic_data_frames_by_name

{'permian-a': {'FDI Observations':    Sample ObservationSetName TreatmentStagePartNumber  \
  0       0   FDI Observations                 Stage-01   
  1       1   FDI Observations                 Stage-02   
  20     20   FDI Observations                 Stage-19   
  26     26   FDI Observations                 Stage-25   
  28     28   FDI Observations                 Stage-02   
  45     45   FDI Observations                 Stage-20   
  52     52   FDI Observations                 Stage-29   
  53     53   FDI Observations                 Stage-01   
  83     83   FDI Observations                 Stage-31   
  
                              Timestamp                    DeltaT      DeltaP  \
  0   2018-11-13T21:45:11.9873152+00:00          01:22:18.9873152    0.362115   
  1   2018-11-14T23:46:24.4282880+00:00          01:49:01.4282880    0.595053   
  20  2018-11-27T16:14:52.3412096+00:00          02:42:24.3412096   39.059964   
  26  9999-12-31T23:59:59.9999999+00:00  2915032.1

In [27]:
# toolz.get_in(['gng', 'Project Data Frame 01'],
#              sampled_project_data_frames_by_name)
# toolz.get_in(['permian-a', 'Project Data Frame 01'],
#              sampled_project_data_frames_by_name)
toolz.get_in(['permian-a', 'FDI Observations'],
             sampled_fdi_data_frames_by_name)

Unnamed: 0,Sample,ObservationSetName,TreatmentStagePartNumber,Timestamp,DeltaT,DeltaP,VolumeToPick
0,0,FDI Observations,Stage-01,2018-11-13T21:45:11.9873152+00:00,01:22:18.9873152,0.362115,4299.649417
1,1,FDI Observations,Stage-02,2018-11-14T23:46:24.4282880+00:00,01:49:01.4282880,0.595053,5009.373675
20,20,FDI Observations,Stage-19,2018-11-27T16:14:52.3412096+00:00,02:42:24.3412096,39.059964,8056.843667
26,26,FDI Observations,Stage-25,9999-12-31T23:59:59.9999999+00:00,2915032.19:26:39.9999999,,8532.66185
28,28,FDI Observations,Stage-02,2018-11-13T04:20:10.0000000+00:00,02:20:11,6.170513,9859.784375
45,45,FDI Observations,Stage-20,2018-11-24T16:39:09.0000000+00:00,02:55:26,98.717645,10014.638758
52,52,FDI Observations,Stage-29,2018-11-27T19:13:34.1109888+00:00,02:01:22.1109888,94.454784,5920.732675
53,53,FDI Observations,Stage-01,2018-11-12T17:07:02.3021312+00:00,02:52:32.3021312,214.641797,11681.990733
83,83,FDI Observations,Stage-31,2018-11-29T04:02:02.8648576+00:00,01:55:11.8648576,31.818509,6803.360483
