In [None]:
import json
import pathlib
import zipfile

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy

In [None]:
import pendulum as pdt
import toolz.curried as toolz

In [None]:
project_filenames = {
    'bakken': 'frankNstein_Bakken_UTM13_FEET.ifrac',
    'montney': 'Project-frankNstein_Montney_UTM13_METERS.ifrac',
    'permian': 'Project_frankNstein_Permian_UTM13_FEET.ifrac',
}

In [None]:
test_data_path = pathlib.Path('c:/src/Orchid.IntegrationTestData/')
project_path_names = toolz.valmap(lambda fn: test_data_path.joinpath(fn), project_filenames)
project_path_names

In [None]:
def project_json(path):
    with zipfile.ZipFile(path) as archive:
        return json.loads(archive.read('project.json'))

In [None]:
bakken_project_json = project_json(project_path_names['bakken'])

In [None]:
bakken_wells = {w['Name']: w for w in toolz.get_in(['Object', 'Wells'], bakken_project_json)}

In [None]:
def string_to_date_time(i):
    column_name, value = i
    
    def to_pandas_timestamp(time_text):
        if time_text != '0001-01-01T00:00:00.0000000':
            result = pd.Timestamp(pdt.parse(time_text))
        else:
            result = pd.NaT
        return result
    
    if column_name == 'StartTime':
        return column_name, to_pandas_timestamp(value)
    elif column_name == 'StopTime':
        return column_name, to_pandas_timestamp(value)
    else:
        return column_name, value
    
def stage_details(s):
    result = toolz.pipe(
        s,
        toolz.keyfilter(lambda n: n in {'DisplayStageNumber', 'GlobalStageSequenceNumber', 'StartTime', 'StopTime'}),
        toolz.itemmap(string_to_date_time),
    )
    return result

def stages_details(project, well):
    result = toolz.pipe(
        toolz.get_in([well, 'Stages'], bakken_wells),
        toolz.map(stage_details),
        toolz.map(lambda s: toolz.merge({'Project': project, 'Well': well}, s)),
        list,
    )
    return result

In [None]:
stages_seq = {}
stages = {}
stages_by_seq_no = {}
previous_treatment_starts = {}
stages_with_previous = {}

In [None]:
# stages_details('bakken', 'Demo_1H')
# stages_details('bakken', 'Demo_2H')
# stages_details('bakken', 'Demo_3H')
# stages_details('bakken', 'Demo_4H')

In [None]:
stages_seq['bakken'] = toolz.concat([
    stages_details('bakken', 'Demo_1H'),
    stages_details('bakken', 'Demo_2H'),
    stages_details('bakken', 'Demo_3H'),
    stages_details('bakken', 'Demo_4H'),
])

In [None]:
stages['bakken'] = pd.DataFrame(data=stages_seq['bakken'])

In [None]:
stages['bakken']

In [None]:
stages_by_seq_no['bakken'] = stages['bakken'].set_index('GlobalStageSequenceNumber').sort_index()
stages_by_seq_no['bakken']

In [None]:
stages_by_seq_no['bakken'].index

In [None]:
fig, ax = plt.subplots()

ax.plot(range(1, 136 + 1), stages_by_seq_no['bakken'].index)

plt.show()

In [None]:
previous_treatment_starts['bakken'] = stages_by_seq_no['bakken'].loc[2:, 'StartTime'].to_frame()
previous_treatment_starts['bakken'].columns = ['PreviousTreatmentStart']
previous_treatment_starts['bakken']

In [None]:
stages_with_previous['bakken'] = pd.concat([stages_by_seq_no['bakken'], previous_treatment_starts['bakken']], axis=1, copy=False)
stages_with_previous['bakken']

In [None]:
stages_with_previous['bakken']['Changeover'] = (
    stages_with_previous['bakken']['StopTime'] - stages_with_previous['bakken']['PreviousTreatmentStart']
)
stages_with_previous['bakken']

In [None]:
stages_with_previous['bakken']['Changeover'] = (
    stages_with_previous['bakken']['Changeover'].apply(lambda ptd: ptd.total_seconds())
)
stages_with_previous['bakken']

In [None]:
stages_with_previous['bakken'].plot.line(y='Changeover')

In [None]:
stages_with_previous['bakken'].hist(column='Changeover')