In [1]:
import pandas as pd
import sys
from data_reader import fes_reader, perf_reader, rename_columns
from tqdm import tqdm
import numpy as np
import bisect
import json

In [2]:
# конвертация путей файлов в зависимости от системы
def replace_slash(file_path):
    platform = sys.platform
    slash_map = {'win32': '\\',
                'cygwin': '\\',
                'darwin': '/',
                'linux2': '/'}
    if platform not in slash_map.keys(): platform = 'linux2'
    return file_path.replace('\\', slash_map[platform])

In [3]:
perf_path = replace_slash('input_data\\' + 'armitz/miner/ARX_PERF.xlsx')
fes_path = replace_slash('input_data\\armitz/miner/QINF.xlsx')

In [4]:
perf_ints, perf_df = perf_reader(perf_path)
fes_df = fes_reader(fes_path)
SOIL_CUT = 60

started reading perf xl
done reading perf xl
started transform perf table to dict
done
started reading fes xl
done reading fes xl


In [6]:
fes_df

Unnamed: 0,well,top,bot,soil
0,1,,1648.4,
1,1,1669.6,1670.6,69.5
2,1,1678.4,1679.2,
3,1,1683.0,1684.0,
4,1,1686.0,1687.0,
...,...,...,...,...
156269,32960,1901.0,1901.8,52.9
156270,32960,1903.0,1905.0,
156271,32960,1914.0,1915.4,
156272,32960,1917.6,1919.0,


In [16]:
def get_actual_perf(perf_df):
    act_perf = []
    for well in tqdm(perf_df['well'].unique()):
        well_df = perf_df[perf_df['well'] == well]
        act_perf_well = []
        for date in well_df['date'].unique():
            date_df = well_df[well_df['date'] == date]
            for top, bot, perf_type in date_df[['top', 'bot', 'type']].values:
                idx = bisect.bisect_left([t['bot'] for t in act_perf_well], top)
                if idx == len(act_perf_well):
                    act_perf_well.append({'well': well, 'top': top,'bot': bot, 'perf_type': perf_type})
                else:
                    if act_perf_well[idx]['top'] > top:
                        if act_perf_well[idx]['perf_type'] == perf_type:
                            act_perf_well[idx]['top'] = top
                        else:
                            act_perf_well.insert(idx, {'well': well, 'top': top,'bot': bot if act_perf_well[idx]['top'] > bot else act_perf_well[idx]['top'],
                                                   'perf_type': perf_type})
                    if act_perf_well[idx]['bot'] < bot:
                        if act_perf_well[idx]['perf_type'] == perf_type:
                            act_perf_well[idx]['bot'] = bot
                        else:
                            act_perf_well.insert(idx + 1, {'well': well, 'top': act_perf_well[idx]['bot'],'bot': bot, 'perf_type': perf_type})

#         act_perf[well] = act_perf_well
        act_perf.extend(act_perf_well)
    return act_perf


#

perf =  get_actual_perf(perf_df)

100%|██████████| 4093/4093 [00:45<00:00, 89.64it/s] 


In [18]:
pd.read_json(json.dumps(perf)).to_excel("act_perf.xlsx")


In [6]:
# проверка пласта на перфорированность
def is_perf(top, bot, ints):
    if ints is None:
        return False
    for int_perf in ints:
        if (top < int_perf['bot']) and (int_perf['top'] < bot):
            return True
    return False

def find_layers(perf_ints, fes_df, soil_cut):
    lost_layers = pd.DataFrame(columns=['well', 'top', 'bot', 'soil', 'is_perf'])
    for well in tqdm(fes_df['well'].unique()):
        well_df = fes_df[fes_df['well'] == well][['well', 'top', 'bot', 'soil']]
        well_df.dropna(inplace=True)
        if len(well_df) == 0:
            continue
        ints = perf_ints.get(well)
        well_df['is_perf'] = well_df\
            .apply(lambda x: is_perf(x['top'], x['bot'], ints), axis=1)
        non_perf = well_df[(~well_df['is_perf']) &
                           (well_df['soil'] > soil_cut)]
        lost_layers = lost_layers.append(non_perf, ignore_index=True)
    return lost_layers

lost_layers = find_layers(perf_ints, fes_df, SOIL_CUT)

100%|██████████| 6457/6457 [01:33<00:00, 69.06it/s]
