In [None]:
import pandas as pd
import sys
from data_reader import fes_reader, perf_reader
from tqdm import tqdm
import bisect
import json
import datetime

In [None]:
# конвертация путей файлов в зависимости от системы
def replace_slash(file_path):
    platform = sys.platform
    slash_map = {'win32': '\\',
                'cygwin': '\\',
                'darwin': '/',
                'linux2': '/'}
    if platform not in slash_map.keys(): platform = 'linux2'
    return file_path.replace('\\', slash_map[platform])

In [None]:
perf_path = replace_slash('input_data\\' + 'armitz/miner/ARX_PERF.xlsx')
fes_path = replace_slash('input_data\\armitz/miner/QINF.xlsx')

In [None]:
perf_df = perf_reader(perf_path)
fes_df = fes_reader(fes_path)
SOIL_CUT = 60

In [None]:
fes_df

In [None]:
perf_df

In [None]:
# проверка пласта на перфорированность
def is_perf(top, bot, ints):
    if ints is None:
        return False
    for int_perf in ints:
        if (top < int_perf['bot']) and (int_perf['top'] < bot):
            return True
    return False


def find_layers(perf_ints, fes_df, soil_cut):
    lost_layers = pd.DataFrame(columns=['well', 'top', 'bot', 'soil', 'is_perf'])
    for well in tqdm(fes_df['well'].unique()):
        well_df = fes_df[fes_df['well'] == well][['well', 'top', 'bot', 'soil']]
        well_df.dropna(inplace=True)
        if len(well_df) == 0:
            continue
        ints = perf_ints.get(well)
        well_df['is_perf'] = well_df\
            .apply(lambda x: is_perf(x['top'], x['bot'], ints), axis=1)
        non_perf = well_df[(~well_df['is_perf']) &
                           (well_df['soil'] > soil_cut)]
        lost_layers = lost_layers.append(non_perf, ignore_index=True)
    return lost_layers

plasts = find_layers(perf_df, fes_df, SOIL_CUT)

In [25]:
perf_ints = {}
for well in tqdm(perf_df['well'].unique()):
    well_df = perf_df[perf_df['well'] == well][['top', 'bot', 'type', 'date']]
    perf_ints[well] = well_df.to_dict(orient='records')

100%|██████████| 3953/3953 [00:16<00:00, 232.67it/s]


In [None]:
def bisect_left(a, x, lo=0, hi=None, param='bot'):
    if lo < 0:
        raise ValueError('lo must be non-negative')
    if hi is None:
        hi = len(a)
    while lo < hi:
        mid = (lo + hi) // 2
        if a[mid][param] < x:
            lo = mid + 1
        else:
            hi = mid
    return lo

def get_actual_perf(perf_ints, act_perf_year=None):
    act_perf = []
    if act_perf_year is None:
        act_perf_year = datetime.datetime.now().date()
    for well in tqdm(perf_ints.keys()):
        act_perf_well = []
        for row in perf_ints[well]:
            if row['date'] > act_perf_year:
                continue
            top = row['top']
            bot = row['bot']
            perf_type = row['type']

            idx = bisect_left(act_perf_well, top)
            if idx == len(act_perf_well):
                act_perf_well.append({'well': well, 'top': top, 'bot': bot,
                                      'perf_type': perf_type})
            else:
                shift = 1
                if act_perf_well[idx]['top'] > top:
                    if act_perf_well[idx]['perf_type'] == perf_type:
                        act_perf_well[idx]['top'] = top
                    else:
                        act_perf_well.insert(idx,
                                             {'well': well, 'top': top,
                                              'bot': bot if
                                              act_perf_well[idx][
                                                  'top'] > bot else
                                              act_perf_well[idx]['top'],
                                              'perf_type': perf_type})
                        shift += 1
                if act_perf_well[idx]['bot'] < bot:
                    if act_perf_well[idx]['perf_type'] == perf_type:
                        act_perf_well[idx]['bot'] = bot
                    else:
                        act_perf_well.insert(idx + shift,
                                             {'well': well,
                                              'top': act_perf_well[idx]['bot'],
                                              'bot': bot,
                                              'perf_type': perf_type})

        act_perf.extend(act_perf_well)
    return act_perf

In [None]:
perf = get_actual_perf(perf_ints)


In [10]:
gr = perf_df.groupby('well', sort=False)

In [26]:
perf_well = perf_df.set_index('well')

g = perf_well.groupby(level=0, sort=False)

perf_ints2 = g.apply(lambda x: [{'type': e[0], 'date': e[1], 'top': e[2], 'bot': e[3]} for e in x.values]).to_dict()