In [None]:
import pathlib
import random

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
from orchid import (project_loader as opl)

In [None]:
import option
import toolz.curried as toolz

In [None]:
# noinspection PyUnresolvedReferences
import UnitsNet

In [None]:
project_filenames = {
    'permian-a': 'Project-frankNstein_Permian_UTM13FT_0412_PjtDataFrame.ifrac',
    'gng': 'GnG_DemoProject_wDataFrames.ifrac',
}

In [None]:
test_data_path = pathlib.Path('c:/src/Orchid.IntegrationTestData/')
project_path_names = toolz.valmap(lambda fn: test_data_path.joinpath(fn), project_filenames)
project_path_names

In [None]:
projects = toolz.valmap(
    lambda pn: opl.ProjectLoader(str(pn)).native_project(),
    project_path_names)
projects

In [None]:
def make_project_data_frames_by_id(project):
    return {df.ObjectId: df for df in project.DataFrames.Items}

project_net_data_frames_by_id = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_id),
)
project_net_data_frames_by_id

In [None]:
def make_project_data_frames_by_name(project):
    return {df.Name: df for df in project.DataFrames.Items}

project_net_data_frames_by_name = toolz.pipe(
    projects,
    toolz.valmap(make_project_data_frames_by_name),
)
project_net_data_frames_by_name

In [None]:
len(list(toolz.get('permian-a', project_net_data_frames_by_name)))

In [None]:
len(list(toolz.get('gng', project_net_data_frames_by_name)))

In [None]:
def extract_net_data_frame_identities(df):
    return {
        'object_id': str(df.ObjectId),
        'name': df.Name,
        'display_name': df.DisplayName,
    }

@toolz.curry
def build_net_data_frame_identities(ndfs):
    return toolz.map(extract_net_data_frame_identities, ndfs)
    
net_data_frame_identities = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(lambda dfs_by_name: dfs_by_name.values()),
    toolz.valmap(build_net_data_frame_identities),
    toolz.valmap(list),
)
net_data_frame_identities

In [None]:
def make_data_frame_identities_summary(item):
    project_name, ndf_id_sums = item
    summaries = toolz.pipe(
        ndf_id_sums,
        toolz.map(lambda ndf_id_sum: toolz.merge(ndf_id_sum, {'project': project_name})),
    )
    return project_name, summaries

data_frame_identities_summary = toolz.itemmap(make_data_frame_identities_summary, net_data_frame_identities)
data_frame_identities_summary

In [None]:
pd.DataFrame(data=data_frame_identities_summary['permian-a'], 
             columns=['project', 'object_id', 'name', 'display_name'])

In [None]:
pd.DataFrame(data=data_frame_identities_summary['gng'], 
             columns=['project', 'object_id', 'name', 'display_name'])

In [None]:
permian_project_01 = toolz.get_in(['permian-a', 'Project Data Frame 01'], project_net_data_frames_by_name)

In [None]:
option.maybe(permian_project_01.DisplayName).unwrap_or('Fooey')

In [None]:
# Adapted from code at
# https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/dataset-datatable-dataview/creating-a-datareader
# retrieved on 18-Apr-2021.

def table_row_to_dict(reader):
    return {reader.GetName(i): reader[reader.GetName(i)] for i in range(reader.FieldCount)}

def read_data_table(data_table):
    reader = data_table.CreateDataReader()
    try:
        while True:
            if reader.HasRows:
                has_row = reader.Read()
                while has_row:
                    yield table_row_to_dict(reader)
                    has_row = reader.Read()
            else:
                return
            if not reader.NextResult():
                break
    finally:
        reader.Dispose()

def table_to_data_frame(data_table):
    return pd.DataFrame(data=[r for r in read_data_table(data_table)])

In [None]:
net_data_frame_to_pandas_data_frame = toolz.compose(table_to_data_frame,
                                                    lambda net_df: net_df.DataTable)

project_pandas_data_frames_by_name = toolz.pipe(
    project_net_data_frames_by_name,
    toolz.valmap(toolz.valmap(net_data_frame_to_pandas_data_frame)),
)

In [None]:
# toolz.get_in(['permian-a', 'Project Data Frame 01'], project_pandas_data_frames_by_name)

In [None]:
all_indices = toolz.compose(list, range, len)

@toolz.curry
def sample_data_frame_column_indices(data_frame):
    # Ignore, `ProjectName`, the first column
    candidate_column_indices = all_indices(data_frame.columns)[1:-1]
    random.shuffle(candidate_column_indices)
    result = candidate_column_indices[:7] + [len(data_frame.columns) - 1]
    return result

@toolz.curry
def sample_data_frame_row_indices(data_frame):
    candidate_row_indices = all_indices(data_frame)[1:-1]
    random.shuffle(candidate_row_indices)
    result = [0] + sorted(candidate_row_indices[:6]) + [len(data_frame) - 1]
    return result

In [None]:
@toolz.curry
def sample_data_frame_indices(indices_func, data_frame):
    return indices_func(data_frame)

In [None]:
sampled_column_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_column_indices),
                                              project_pandas_data_frames_by_name)
sampled_column_indices_by_name

In [None]:
sampled_row_indices_by_name = toolz.valmap(toolz.valmap(sample_data_frame_row_indices),
                                           project_pandas_data_frames_by_name)
sampled_row_indices_by_name

In [None]:
data_frames_to_sample_by_name = toolz.merge_with(toolz.merge_with(tuple),
                                                 project_pandas_data_frames_by_name,
                                                 sampled_row_indices_by_name,
                                                 sampled_column_indices_by_name)
data_frames_to_sample_by_name

In [None]:
def sample_data_frame(data_frame, row_indices, column_indices):
    result = data_frame.iloc[row_indices, column_indices]
    return result

In [None]:
sampled_project_data_frames_by_name = toolz.valmap(toolz.valmap(lambda e: sample_data_frame(*e)),
                                                   data_frames_to_sample_by_name)
sampled_project_data_frames_by_name

In [None]:
sampled_project_data_frames_by_name['permian-a']['Project Data Frame 01']