In [1]:
import pandas as pd
import sys
from data_reader import fes_reader, perf_reader, well_renaming, rename_columns
from tqdm import tqdm
import bisect
import json
import datetime

import json
import logging
import os
import sys
from datetime import date

from data_reader import fes_reader, perf_reader
from finder import find_layers
from actual_perf import get_actual_perf
from writexl import write_layers, write_act_perf
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# конвертация путей файлов в зависимости от системы
def replace_slash(file_path):
    platform = sys.platform
    slash_map = {'win32': '\\',
                 'cygwin': '\\',
                 'darwin': '/',
                 'linux2': '/'}
    if platform not in slash_map.keys(): platform = 'linux2'
    return file_path.replace('\\', slash_map[platform])


# очистка папки output_folder
def clear_out_folder(output_folder):
    files = os.listdir(output_folder)
    for f in files:
        path_dir = replace_slash(output_folder + "\\" + f)
        os.remove(path_dir)


def get_year(conf_perf_year):
    if conf_perf_year == '':
        return None
    try:
        return date(year=int(conf_perf_year), month=1, day=1)
    except:
        return None

def rename_columns(df):
    col_names = {'well': '', 'top': '', 'bot': '',
                 'soil': '', 'date': '', 'type': ''}
    for column in df.columns.values:
        if ('скв' in column) or ('skw_nam' in column) or (column == 'skw'):
            col_names['well'] = column
        elif ('verh' in column) or ('krow' == column) or ('верх' in column) or ('perf1_t' in column):
            col_names['top'] = column
        elif ('niz' in column) or ('podosh' in column) or ('низ' in column) or ('perf2_t' in column):
            col_names['bot'] = column
        elif ('nnas' in column) or ('н_нас' in column):
            col_names['soil'] = column
        elif ('дата_перф' in column) or (column == 'dat'):
            col_names['date'] = column
        elif ('цель' in column) or ('_cel' in column):
            col_names['type'] = column
    df.rename(columns={col_names['bot']: 'bot', col_names['top']: 'top',
                       col_names['well']: 'well', col_names['soil']: 'soil',
                       col_names['date']: 'date', col_names['type']: 'type'},
              inplace=True)
    col_names_set = set(df.columns)
    df.drop(columns=list(col_names_set.difference(col_names.keys())),
            inplace=True)

In [3]:
input_folder = "input_data"
out_folder = "output_data"

if not os.path.exists(out_folder):
    os.makedirs(out_folder)
else:
    clear_out_folder(out_folder)

out_path_log = replace_slash(out_folder + "\\" + "Report.txt")
logging.basicConfig(format=u'%(levelname)-8s : %(message)s', filename=out_path_log, filemode='w')

with open("config.json", 'r') as f:
    try:
        conf = json.load(f)
        SOIL_CUT = float(conf["SOIL_CUT"])
        perf_path = conf["perf_path"]
        fes_path = conf["fes_path"]
        act_perf_year = get_year(conf["act_perf_year"])
    except BaseException as e:
        logging.error("Error loading config file. " + str(e))
        sys.exit()
perf_path = replace_slash(input_folder + '\\' + perf_path)
fes_path = replace_slash(input_folder + '\\' + fes_path)
try:
    perf_ints = perf_reader(perf_path)
except BaseException as e:
    logging.error("Error loading perf file. " + str(e))
    sys.exit()
try:
    fes_dict = fes_reader(fes_path)
except BaseException as e:
    logging.error("Error loading fes file. " + str(e))
    sys.exit()

try:
    lost_layers = find_layers(perf_ints, fes_dict, SOIL_CUT)
except BaseException as e:
    logging.error("Error while finding layers " + str(e))
    sys.exit()

try:
    act_perf = get_actual_perf(perf_ints, act_perf_year)
except BaseException as e:
    logging.error("Error while getting the actual perforation " + str(e))
    sys.exit()

started reading perf xl
done reading perf xl and started processing perf data
started reading fes xl


100%|██████████| 1574/1574 [00:00<00:00, 46816.21it/s]
100%|██████████| 1579/1579 [00:00<00:00, 57364.65it/s]

done reading fes xl
done processing data





In [33]:
act_df = pd.read_json(json.dumps(act_perf))

act_df2 =  pd.read_json('perf2.json')

act_df2.rename(columns=lambda x: x.lower().strip(), inplace=True)

rename_columns(act_df2)

act_df2['well'] = act_df2['well'].apply(well_renaming)

act_df2.sort_values(by=['well', 'top'], ascending=False, inplace=True)
act_df.sort_values(by=['well', 'top'], ascending=False, inplace=True)
act_df.reset_index(drop=True, inplace=True)
act_df2.reset_index(drop=True, inplace=True)
del_idxs = []
for i in tqdm(range(len(act_df2) - 1)):
    if (act_df2.loc[i, 'top'] == act_df2.loc[i + 1, 'bot']) and (act_df2.loc[i, 'well'] == act_df2.loc[i + 1, 'well']):
        act_df2.loc[i + 1, 'bot'] = act_df2.loc[i, 'bot']
        del_idxs.append(i)
act_df2.drop(del_idxs, inplace=True)
act_df2.reset_index(drop=True, inplace=True)

del_idxs = []
for i in tqdm(range(len(act_df) - 1)):
    if (act_df.loc[i, 'well'] == act_df.loc[i + 1, 'well']):
        if (act_df.loc[i, 'bot'] == act_df.loc[i + 1, 'bot']) or  
        del_idxs.append(i)
act_df.drop(del_idxs, inplace=True)
act_df.reset_index(drop=True, inplace=True)
                    
len(list(set(act_df['well'].unique()).difference(act_df2['well'].unique())))

len(list(set(act_df2['well'].unique()).difference(act_df['well'].unique())))

act_df_intersec = act_df[act_df['well'].isin(act_df2['well'].unique())]

act_df_diff = act_df_intersec[act_df_intersec['perf_type'] == 1][['well', 'top', 'bot']]

diff = act_df_diff[~act_df_diff.apply(tuple,1).isin(act_df2.apply(tuple,1))]
diff2 = act_df2[~act_df2.apply(tuple,1).isin(act_df_diff.apply(tuple,1))]

100%|██████████| 6240/6240 [00:00<00:00, 7258.18it/s]
100%|██████████| 6866/6866 [00:00<00:00, 57039.25it/s]


In [34]:
print(len(diff))
print(len(diff2))

261
307


In [28]:
diff3 = pd.concat([act_df2, act_df_diff]).drop_duplicates(keep=False)

In [35]:
print(len(diff))
print(len(diff2))
print(len(diff3))

261
307
562


In [36]:
diff.head(20)

Unnamed: 0,well,top,bot
21,9644д,1636.0,1640.0
71,9633,1825.5,1828.0
72,9633,1825.0,1830.0
108,9624,1776.0,1776.6
167,9613,1672.0,1676.0
182,9606,1810.0,1813.0
183,9606,1805.6,1810.0
201,9600,1684.4,1687.0
276,9584,1722.0,1729.0
315,9571а,1849.6,1851.6


In [41]:
act_df_diff.loc[19:27]

Unnamed: 0,well,top,bot
19,9645,1617.5,1619.5
20,9645,1605.0,1609.0
21,9644д,1636.0,1640.0
22,9644д,1631.0,1641.0
23,9644д,1619.4,1621.0
24,9644д,1615.2,1617.4
25,9644д,1606.8,1609.0
26,9644,1616.5,1619.0
27,9644,1611.5,1615.5


In [42]:
act_df2[act_df2['well']=='9644д']

Unnamed: 0,well,top,bot
12,9644д,1631.0,1641.0
13,9644д,1619.4,1621.0
14,9644д,1615.2,1617.4
15,9644д,1606.8,1609.0
