In [1]:
import pandas as pd
import sys
from data_reader import DataReader, rename_columns
from tqdm import tqdm
import bisect
import json
import datetime

import json
import logging
import os
import sys
from datetime import date

from finder import find_layers
from actual_perf import get_actual_perf
from writexl import write_layers, write_act_perf
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# конвертация путей файлов в зависимости от системы
def replace_slash(file_path):
    platform = sys.platform
    slash_map = {'win32': '\\',
                 'cygwin': '\\',
                 'darwin': '/',
                 'linux2': '/'}
    if platform not in slash_map.keys(): platform = 'linux2'
    return file_path.replace('\\', slash_map[platform])


# очистка папки output_folder
def clear_out_folder(output_folder):
    files = os.listdir(output_folder)
    for f in files:
        path_dir = replace_slash(output_folder + "\\" + f)
        os.remove(path_dir)


def get_year(conf_perf_year):
    if conf_perf_year == '':
        return None
    try:
        return date(year=int(conf_perf_year), month=1, day=1)
    except:
        return None

def rename_columns(df):
    col_names = {'well': '', 'top': '', 'bot': '',
                 'soil': '', 'date': '', 'type': '', 'type_perf': '',
                 'layer': ''}
    for column in df.columns.values:
        if ('скв' in column) or ('skw_nam' in column) or (column == 'skw'):
            col_names['well'] = column
        elif ('verh' in column) or ('krow' == column) or ('верх' in column) or ('perf1_t' in column):
            col_names['top'] = column
        elif ('niz' in column) or ('podosh' in column) or ('низ' in column) or ('perf2_t' in column):
            col_names['bot'] = column
        elif ('nnas' in column) or ('н_нас' in column):
            col_names['soil'] = column
        elif ('дата_перф' in column) or (column == 'dat'):
            col_names['date'] = column
        elif ('цель' in column) or ('_cel' in column):
            col_names['type'] = column
        elif ('tip_perf' in column):
            col_names['type_perf'] = column
        elif ('plast_nam' in column):
            col_names['layer'] = column
    df.rename(columns={col_names['bot']: 'bot', col_names['top']: 'top',
                       col_names['well']: 'well', col_names['soil']: 'soil',
                       col_names['date']: 'date', col_names['type']: 'type',
                       col_names['type_perf']: 'type_perf',
                       col_names['layer']: 'layer'},
              inplace=True)
    col_names_set = set(df.columns)
    df.drop(columns=list(col_names_set.difference(col_names.keys())),
            inplace=True)

In [3]:
input_folder = "input_data"
out_folder = "output_data"
output_path_l = replace_slash(out_folder + "\\" + "non_perf_layers.xlsx")
output_path_a = replace_slash(out_folder + "\\" + "act_perf.xlsx")

if not os.path.exists(out_folder):
    os.makedirs(out_folder)
else:
    clear_out_folder(out_folder)

out_path_log = replace_slash(out_folder + "\\" + "Report.txt")
logging.basicConfig(format=u'%(levelname)-8s : %(message)s', filename=out_path_log, filemode='w')

with open("config.json", 'r') as f:
    try:
        conf = json.load(f)
        SOIL_CUT = float(conf["SOIL_CUT"])
        perf_path = conf["perf_path"]
        fes_path = conf["fes_path"]
        act_perf_year = get_year(conf["act_perf_year"])
    except BaseException as e:
        logging.error("Error loading config file. " + str(e))
        sys.exit()
perf_path = replace_slash(input_folder + '\\' + perf_path)
fes_path = replace_slash(input_folder + '\\' + fes_path)

dr = DataReader()
try:
    perf_ints = dr.perf_reader(perf_path)
except BaseException as e:
    logging.error("Error loading perf file. " + str(e))
    sys.exit()
try:
    fes_dict = dr.fes_reader(fes_path)
except BaseException as e:
    logging.error("Error loading fes file. " + str(e))
    sys.exit()

perf_rig_diff, rig_perf_diff = dr.well_diff()
if len(perf_rig_diff) > 0:
    logging.warning("These wells in perf file are absent in rigis "
                    + str(perf_rig_diff))
if len(rig_perf_diff) > 0:
    logging.warning("These wells in rigis file are absent in perf "
                    + str(rig_perf_diff))

try:
    lost_layers = find_layers(perf_ints, fes_dict, SOIL_CUT)
except BaseException as e:
    logging.error("Error while finding layers " + str(e))
    sys.exit()

try:
    act_perf = get_actual_perf(perf_ints, act_perf_year)
except BaseException as e:
    logging.error("Error while getting the actual perforation " + str(e))
    sys.exit()

started reading perf xl
done reading perf xl and started processing perf data
started reading fes xl


100%|██████████| 1574/1574 [00:00<00:00, 62674.65it/s]
100%|██████████| 1579/1579 [00:00<00:00, 33452.06it/s]

done reading fes xl
done processing data





In [4]:
wells1 = list(dr.sl_wells)

In [5]:
dr.sl_wells = set()

In [6]:
wells2 = dr.sl_wells

In [7]:
act_df = pd.read_json(json.dumps(act_perf))

act_df2 =  pd.read_json('perf2.json')

act_df2.rename(columns=lambda x: x.lower().strip(), inplace=True)

rename_columns(act_df2)

act_df2['well'] = act_df2['well'].apply(dr.well_renaming)
act_df = act_df[act_df['perf_type'] == 1]
act_df2.sort_values(by=['well', 'top', 'bot'], ascending=False, inplace=True)
act_df.sort_values(by=['well', 'top', 'bot'], ascending=False, inplace=True)
act_df.drop(act_df[act_df['top'] == act_df['bot']].index, inplace=True)
act_df2.drop(act_df2[act_df2['top'] == act_df2['bot']].index, inplace=True)
act_df.reset_index(drop=True, inplace=True)
act_df2.reset_index(drop=True, inplace=True)
del_idxs = []
for i in tqdm(range(len(act_df2) - 1)):
    if ((act_df2.loc[i, 'top'] == act_df2.loc[i + 1, 'bot']) or (act_df2.loc[i, 'top'] == act_df2.loc[i + 1, 'top'])) and (act_df2.loc[i, 'well'] == act_df2.loc[i + 1, 'well']):
        act_df2.loc[i + 1, 'bot'] = act_df2.loc[i, 'bot']
        del_idxs.append(i)
act_df2.drop(del_idxs, inplace=True)
act_df2.reset_index(drop=True, inplace=True)

del_idxs = []
for i in tqdm(range(len(act_df2) - 1)):
    if (act_df2.loc[i, 'bot'] <= act_df2.loc[i + 1, 'bot']) and (act_df2.loc[i, 'well'] == act_df2.loc[i + 1, 'well']):
        del_idxs.append(i)
act_df2.drop(del_idxs, inplace=True)
act_df2.reset_index(drop=True, inplace=True)

del_idxs = []
for i in tqdm(range(len(act_df) - 1)):
    if (act_df.loc[i, 'bot'] <= act_df.loc[i + 1, 'bot']) and (act_df.loc[i, 'well'] == act_df.loc[i + 1, 'well']):
        del_idxs.append(i)
act_df.drop(del_idxs, inplace=True)
act_df.reset_index(drop=True, inplace=True)

del_idxs = []
for i in tqdm(range(len(act_df) - 1)):
    if ((act_df.loc[i, 'top'] == act_df.loc[i + 1, 'bot']) or (act_df.loc[i, 'top'] == act_df.loc[i + 1, 'top'])) and (act_df.loc[i, 'well'] == act_df.loc[i + 1, 'well']):
        act_df.loc[i + 1, 'bot'] = act_df.loc[i, 'bot']
        del_idxs.append(i)
act_df.drop(del_idxs, inplace=True)
act_df.reset_index(drop=True, inplace=True)

                    
len(list(set(act_df['well'].unique()).difference(act_df2['well'].unique())))

len(list(set(act_df2['well'].unique()).difference(act_df['well'].unique())))

act_df_intersec = act_df[act_df['well'].isin(act_df2['well'].unique())]

act_df_diff = act_df_intersec[act_df_intersec['perf_type'] == 1][['well', 'top', 'bot', 'layer']]

diff = act_df_diff[~act_df_diff.apply(tuple,1).isin(act_df2.apply(tuple,1))]
diff2 = act_df2[~act_df2.apply(tuple,1).isin(act_df_diff.apply(tuple,1))]

100%|██████████| 6195/6195 [00:00<00:00, 7353.02it/s]
100%|██████████| 3988/3988 [00:00<00:00, 66871.43it/s]
100%|██████████| 4136/4136 [00:00<00:00, 65563.74it/s]
100%|██████████| 4136/4136 [00:00<00:00, 31338.38it/s]


In [8]:
print(len(diff))
print(len(diff2))

59
46


In [9]:
diff2.head()

Unnamed: 0,well,top,bot,layer
133,9599,1679.6,1682.0,Д0+Д1Г2+3
268,9544,1729.4,1733.3,Д0+Д1Б2
269,9544,1722.0,1725.0,Д0+Д1A
585,3682д,1728.8,1731.6,Д0+Д1B
595,3680а,1688.8,1692.0,Д0+Д1Г1


In [10]:
diff3 = pd.concat([act_df2, act_df_diff]).drop_duplicates(keep=False)

In [11]:
print(len(diff))
print(len(diff2))
print(len(diff3))

59
46
105


In [12]:
diff_wells = diff3['well'].unique()
print(len(diff_wells))
print(sorted(diff_wells))

75
['10792', '10801', '10810', '10827д', '10832', '10840', '10843', '10868', '10887', '10894', '1205', '1214', '134д', '145', '14936', '14942', '14963', '152', '153', '154', '15708', '15751', '15755', '16555', '176д', '179', '199', '20113', '20140а', '20174', '20199', '20211а', '20214', '20225', '20236', '20282', '20300', '20323', '20384', '20408', '20415', '20457', '20505', '20508', '20531', '20645', '20648', '20652', '20690', '216д', '231', '242', '2476', '270', '290', '294', '3056', '3100', '3115', '3123', '3128', '3133', '3179', '3184', '3192', '32660', '32693', '3469', '3680а', '3682д', '440д', '9525', '9544', '9599', '9624']


In [13]:
pd.DataFrame(data={'скважины': diff_wells}).to_excel('diff_wells.xlsx', index=False)

In [14]:
diff_df = pd.DataFrame(columns=['source','well', 'top', 'bot', 'layer'])

In [15]:
act_df2['source'] = 'армитс'
act_df_diff['source'] = 'local'

In [16]:
for well in diff_wells:
    diff_df = diff_df.append(act_df_diff[act_df_diff['well']== well])
    diff_df = diff_df.append(act_df2[act_df2['well']== well])    

In [17]:
diff_df.to_excel('diff_wells_info.xlsx', index=False)

In [18]:
perf_ints['3140']

[{'type': 0,
  'date': datetime.date(2017, 12, 6),
  'top': 1749.0,
  'bot': 1752.0,
  'layer': 'Д0'},
 {'type': 1,
  'date': datetime.date(2016, 5, 20),
  'top': 1762.8,
  'bot': 1765.6,
  'layer': 'Д0+Д1A'},
 {'type': 0,
  'date': datetime.date(2013, 7, 11),
  'top': 1749.0,
  'bot': 1752.0,
  'layer': 'Д0'},
 {'type': 0,
  'date': datetime.date(2012, 3, 1),
  'top': 1749.0,
  'bot': 1752.0,
  'layer': 'Д0'},
 {'type': 1,
  'date': datetime.date(2011, 7, 31),
  'top': 1749.0,
  'bot': 1752.0,
  'layer': 'Д0'},
 {'type': 0,
  'date': datetime.date(2008, 6, 10),
  'top': 1749.0,
  'bot': 1752.0,
  'layer': 'Д0'},
 {'type': 1,
  'date': datetime.date(2008, 1, 9),
  'top': 1763.0,
  'bot': 1765.6,
  'layer': 'Д0+Д1A'},
 {'type': 2,
  'date': datetime.date(2007, 12, 25),
  'top': 1771.2,
  'bot': 1773.0,
  'layer': 'Д0+Д1Б2'},
 {'type': 2,
  'date': datetime.date(2007, 12, 25),
  'top': 1773.0,
  'bot': 1776.2,
  'layer': 'Д0+Д1Б3'},
 {'type': 2,
  'date': datetime.date(2007, 12, 25),
  '