In [None]:
import enum
import pathlib
import random

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
import option
import toolz.curried as toolz

In [None]:
from orchid import (project_loader as opl)

In [None]:
# noinspection PyUnresolvedReferences
from System import DBNull, DateTime, DateTimeOffset, TimeSpan
# noinspection PyUnresolvedReferences
import UnitsNet

In [None]:
project_filenames = {
    'permian-u': 'Project-frankNstein_Permian_UTM13FT_DF_PR2298_vs263.ifrac',
    'permian-c': 'Project-frankNstein_Permian_UTM13FT_0412_PjtDataFrame.ifrac',
    'gng': 'GnG_DemoProject_wDataFrames.ifrac',
}

In [None]:
test_data_path = pathlib.Path('c:/src/Orchid.IntegrationTestData/')
project_path_names = toolz.valmap(lambda fn: test_data_path.joinpath(fn), project_filenames)
project_path_names

In [None]:
projects = toolz.valmap(
    lambda pn: opl.ProjectLoader(str(pn)).native_project(),
    project_path_names)
projects

In [None]:
def make_project_data_frames_by_id(project):
    return {df.ObjectId: df for df in project.DataFrames.Items}

project_net_data_frames_by_id = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_id),
)
project_net_data_frames_by_id

In [None]:
def make_project_data_frames_by_name(project):
    return {df.Name: df for df in project.DataFrames.Items}

project_net_data_frames_by_name = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_name),
)
project_net_data_frames_by_name

In [None]:
len(list(toolz.get('permian-u', project_net_data_frames_by_name)))

In [None]:
len(list(toolz.get('permian-c', project_net_data_frames_by_name)))

In [None]:
len(list(toolz.get('gng', project_net_data_frames_by_name)))

In [None]:
POTENTIALLY_CORRUPTED = ' (Potentially Corrupted)'

class PermianFrames(enum.Enum):
    PROJECT = 'Project Data Frame 01'
    FDI = 'FDI Observations'
    MICROSEISMIC = 'Microseismic Data Frame 01' + POTENTIALLY_CORRUPTED
    STAGE = 'Stage Data Frame 01'
    WELL_LOG = 'Well Log Set Data Frame 01'
    C2_STAGE_12 = 'C2-stg12_Xft_Permian_Edited_19-Nov-2018'
    FAULT_TRACE = 'Fault Trace Set Data Frame 01'
    FAULT_SET = 'Fault Set Data Frame 01'

class PermianCorruptedFrames(enum.Enum):
    PROJECT = 'Project Data Frame 01' + POTENTIALLY_CORRUPTED
    FDI = 'FDI Observations' + POTENTIALLY_CORRUPTED
    MICROSEISMIC = 'C3-Microseismic Data Frame 01' + POTENTIALLY_CORRUPTED

class GnGFrames(enum.Enum):
    PROJECT = 'Project Data Frame 01'
    FAULT_TRACE = 'Fault Trace Set Data Frame 01'
    STAGE = 'Stage Data Frame 01'
    WELL_LOG = 'Well Log Set Data Frame 01'
    HORIZON = 'Horizon Marker Set Data Frame01'

In [None]:
permian_fault_trace_net_data_frame = toolz.get_in(
    ['permian-u', PermianFrames.FAULT_TRACE.value], project_net_data_frames_by_name)
print([c.ColumnName for c in permian_fault_trace_net_data_frame.DataTable.Columns])
print(len(permian_fault_trace_net_data_frame.DataTable.Rows))

In [None]:
permian_fault_set_net_data_frame = toolz.get_in(
    ['permian-u', PermianFrames.FAULT_SET.value], project_net_data_frames_by_name)
print([c.ColumnName for c in permian_fault_set_net_data_frame.DataTable.Columns])
print(len(permian_fault_set_net_data_frame.DataTable.Rows))

In [None]:
def extract_net_data_frame_identities(df):
    return {
        'object_id': str(df.ObjectId),
        'name': df.Name,
        'display_name': df.DisplayName,
    }

@toolz.curry
def build_net_data_frame_identities(net_dfs):
    return toolz.map(extract_net_data_frame_identities, net_dfs)
    
net_data_frame_identities = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(lambda dfs_by_name: dfs_by_name.values()),
    toolz.valmap(build_net_data_frame_identities),
    toolz.valmap(list),
)
net_data_frame_identities

In [None]:
def make_data_frame_identities_summary(item):
    project_name, ndf_id_sums = item
    summaries = toolz.pipe(
        ndf_id_sums,
        toolz.map(lambda ndf_id_sum: toolz.merge(ndf_id_sum, {'project': project_name})),
    )
    return project_name, summaries

data_frame_identities_summary = toolz.itemmap(make_data_frame_identities_summary,
                                              net_data_frame_identities)
data_frame_identities_summary

In [None]:
pd.DataFrame(data=data_frame_identities_summary['permian-u'],
             columns=['project', 'object_id', 'name', 'display_name'])

In [None]:
pd.DataFrame(data=data_frame_identities_summary['permian-c'],
             columns=['project', 'object_id', 'name', 'display_name'])

In [None]:
pd.DataFrame(data=data_frame_identities_summary['gng'], 
             columns=['project', 'object_id', 'name', 'display_name'])

In [None]:
permian_project_01 = toolz.get_in(['permian-u', PermianFrames.PROJECT.value],
                                  project_net_data_frames_by_name)
permian_corrupted_project_01 = toolz.get_in(['permian-c', PermianCorruptedFrames.PROJECT.value],
                                            project_net_data_frames_by_name)

In [None]:
option.maybe(permian_corrupted_project_01.DisplayName).unwrap_or('foo')

In [None]:
# Adapted from code at
# https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/dataset-datatable-dataview/creating-a-datareader
# retrieved on 18-Apr-2021.

def table_row_to_dict(reader, row_no):
    def map_value(rv, col_type):
        if rv == DBNull.Value:
            return None

        if col_type == DateTimeOffset.UtcNow.GetType():
            return rv.ToString('o')

        if col_type == DateTime.UtcNow.GetType():
            rv_text = rv.ToString('o')
            print(f'"{rv_text}" {col_type}')
            raise TypeError(f'Unexpected `DateTime` {rv_text}')

        return rv

    seed = {'Sample': row_no}
    indices = range(reader.FieldCount)
    names = [reader.GetName(i) for i in indices]
    col_types = [reader.GetFieldType(i) for i in indices]
    raw_values = [reader[name] for name in names]
    values = toolz.map(map_value, raw_values, col_types)
    table_result = dict(zip(names, values))
    result = toolz.merge(seed, table_result)
    return result

def read_data_table(data_table):
    reader = data_table.CreateDataReader()
    row_nos = iter(range(len(data_table.Rows)))
    try:
        while True:
            if reader.HasRows:
                has_row = reader.Read()
                while has_row:
                    yield table_row_to_dict(reader, next(row_nos))
                    has_row = reader.Read()
            else:
                return
            if not reader.NextResult():
                break
    finally:
        reader.Dispose()

def table_to_data_frame(data_table):
    return pd.DataFrame(data=[r for r in read_data_table(data_table)])

In [None]:
net_data_frame_to_pandas_data_frame = toolz.compose(table_to_data_frame,
                                                    lambda net_df: net_df.DataTable)

project_pandas_data_frames_by_name = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(toolz.valmap(net_data_frame_to_pandas_data_frame)),
)

In [None]:
permian_fault_trace_data_frame = toolz.get_in(
    ['permian-u', PermianFrames.FAULT_TRACE.value], project_pandas_data_frames_by_name)
print(permian_fault_trace_data_frame)

In [None]:
permian_fault_set_data_frame = toolz.get_in(
    ['permian-u', PermianFrames.FAULT_SET.value], project_pandas_data_frames_by_name)
print(permian_fault_set_data_frame)

In [None]:
all_indices = toolz.compose(list, range, len)

@toolz.curry
def sample_data_frame_column_indices(df):
    all_candidates = all_indices(df.columns.values)

    # Do nat sample the "Sample" column seeded at the beginning of the columns
    no_sample_candidates = all_candidates[1:]

    # Do not sample the "ProjectName" column if it is present
    no_project_name_candidates = no_sample_candidates
    try:
        project_name_index = list(df.columns.values).index('ProjectName')
        no_project_name_candidates.remove(project_name_index)
    except ValueError:
        pass

    # Do not sample the last column
    candidate_column_indices = no_project_name_candidates[:-1]

    # Return empty sample indices if no candidates
    if len(candidate_column_indices) == 0:
        return []

    # Randomly sample six columns from the candidates
    random.shuffle(candidate_column_indices)

    # Bracket the candidates with the indices of the "Sample" column and the last column
    result = [0] + sorted(candidate_column_indices[:6]) + [len(df.columns) - 1]
    return result

@toolz.curry
def sample_data_frame_row_indices(df):
    all_candidates = all_indices(df)
    if len(all_candidates) == 0:
        # No rows so return empty sample indices
        return []

    if len(all_candidates) == 1:
        # One row so return that row
        return all_candidates

    # At least two rows so skip the first and last row
    candidate_row_indices = all_candidates[1:-1]

    random.shuffle(candidate_row_indices)
    # first row, a sorted, random sample of 6 rows, and the last row
    result = [0] + sorted(candidate_row_indices[:6]) + [len(df) - 1]
    return result

In [None]:
@toolz.curry
def sample_data_frame_indices(indices_func, df):
    return indices_func(df)

In [None]:
sampled_column_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_column_indices),
                                              project_pandas_data_frames_by_name)
sampled_column_indices_by_name

In [None]:
sampled_row_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_row_indices),
                                           project_pandas_data_frames_by_name)
sampled_row_indices_by_name

In [None]:
for field_name in project_pandas_data_frames_by_name.keys():
    for data_frame_name in project_pandas_data_frames_by_name[field_name].keys():
        data_frame = toolz.get_in([field_name, data_frame_name], project_pandas_data_frames_by_name)
        print(f'{field_name=}, {data_frame_name=}, {len(data_frame.columns.values)=}')
        print(data_frame.columns.values)
        print(f'Empty? {data_frame.empty}')
        print(f'{len(data_frame)=}')

In [None]:
def add_sample_numbers(items):
    df, row_indices, column_indices = items
    if df.empty:
        return df, row_indices, column_indices

    sample_index = (list(df.columns.values)).index('Sample')
    for row_index in row_indices:
        df.iloc[row_index, [sample_index]] = row_index
    return df, row_indices, column_indices

data_frames_to_sample_by_name = toolz.merge_with(toolz.merge_with(add_sample_numbers),
                                                 project_pandas_data_frames_by_name,
                                                 sampled_row_indices_by_name,
                                                 sampled_column_indices_by_name)
# data_frames_to_sample_by_name

In [None]:
def sample_data_frame(df, row_indices, column_indices):
    if df.empty:
        return df

    result = df.iloc[row_indices, column_indices]
    return result

In [None]:
sampled_project_data_frames_by_name = toolz.valmap(toolz.valmap(lambda e: sample_data_frame(*e)),
                                                   data_frames_to_sample_by_name)
# sampled_project_data_frames_by_name

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.PROJECT.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.FDI.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.MICROSEISMIC.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.STAGE.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.WELL_LOG.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.C2_STAGE_12.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.FAULT_TRACE.value]

In [None]:
sampled_project_data_frames_by_name['permian-u'][PermianFrames.FAULT_SET.value]

In [None]:
sampled_project_data_frames_by_name['permian-c'][PermianCorruptedFrames.PROJECT.value]

In [None]:
sampled_project_data_frames_by_name['permian-c'][PermianCorruptedFrames.FDI.value]

In [None]:
sampled_project_data_frames_by_name['permian-c'][PermianCorruptedFrames.MICROSEISMIC.value]

In [None]:
sampled_project_data_frames_by_name['gng'][GnGFrames.PROJECT.value]

In [None]:
sampled_project_data_frames_by_name['gng'][GnGFrames.FAULT_TRACE.value]

In [None]:
sampled_project_data_frames_by_name['gng'][GnGFrames.STAGE.value]

In [None]:
sampled_project_data_frames_by_name['gng'][GnGFrames.WELL_LOG.value]

In [None]:
sampled_project_data_frames_by_name['gng'][GnGFrames.HORIZON.value]