In [21]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os
from scipy.interpolate import interp1d

In [22]:
def seismic_ascii_parser(file_path):
    all_data = []
    head = ""
    xarray = []
    yarray = []
    zarray = []
    with open(file_path, 'r') as f:
        content = f.readlines()
        for line in content:
            if '#' in line:
                head += line
                continue
            l = line.split(' ')
            xarray.append(float(l[0]))
            yarray.append(float(l[1]))
            zarray.append(float(l[2]))

    df = pd.DataFrame({'x': xarray, 'y': yarray, 'z': zarray})
    all_data.append({"test": df})
    return create_seismic_ascii_date(all_data)[0]

def create_seismic_ascii_date(data_frame):
    for dataframe_data in data_frame:
        _, value = next(iter(dataframe_data.items()))
        return value.to_dict("records"),


In [23]:
def process_well_data_3(folder_path, dict):
    copy_dict = dict.copy()
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            name_part = os.path.splitext(filename)[0]

            index = copy_dict['wells'].index(name_part)

            
            file_path = os.path.join(folder_path, filename)
            data = pd.read_csv(file_path, sep=r'\s+', comment='#', header=0, on_bad_lines='skip')

            if data.shape[1] >= 4:
                
                first_col_numeric = pd.to_numeric(data.iloc[:, 0], errors='coerce')
                first_col_interpolated = first_col_numeric.interpolate()
                fourth_col = data.iloc[:, 3]
               
                top_tvd = copy_dict['Top_TVD'][index]
                bottom_tvd = copy_dict['Bottom_TVD'][index]

                top_md = first_col_interpolated[fourth_col == top_tvd].values
                bottom_md = first_col_interpolated[fourth_col == bottom_tvd].values
                
                
                if top_md.size == 0:
                    interp_func = interp1d(fourth_col, first_col_interpolated, bounds_error=False, fill_value='extrapolate')
                    top_md = interp_func(top_tvd)

                if bottom_md.size == 0:
                    interp_func = interp1d(fourth_col, first_col_interpolated, bounds_error=False, fill_value='extrapolate')
                    bottom_md = interp_func(bottom_tvd)

                copy_dict['Top_MD'][index] = round(float(top_md), 5)
                copy_dict['Bottom_MD'][index] = round(float(bottom_md), 5)

    return copy_dict
 

In [24]:
# Это списки со словарями формата [{'x': 445946.632991, 'y': 7004198.214381, 'z': -2884.616455}, ...]
Lateral_trend = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_2/Lateral_trend_No_smooth_Heff.txt")
Top = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_5/TVDSS_Top_U1.txt")
Bottom = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_5/TVDSS_Bottom_U1.txt")
df_Lateral_trend = pd.DataFrame(Lateral_trend)
df_Top_U1 = pd.DataFrame(Top)
df_Bottom_U1 = pd.DataFrame(Bottom)

In [25]:
#Датафрейм из имен скважин и их координатами. 
column_names = ['wells', 'name', 'inlines', 'xlines', 'top', 'bottom'] 
df_all_wells_coor = pd.read_csv('C:/HV/Seismic/datas/test_3_2/data_wells.txt', sep='\t', names=column_names, header=None)

In [26]:
df_Lateral_trend = df_Lateral_trend.sort_values('x')
df_Top_U1 = df_Top_U1.sort_values('x')
df_Bottom_U1 = df_Bottom_U1.sort_values('x')
df_all_wells_coor = df_all_wells_coor.sort_values('inlines')

merged_lateral = pd.merge_asof(df_all_wells_coor, df_Lateral_trend, left_on='inlines', right_on='x', direction='nearest')
merged_lateral = merged_lateral.sort_values('xlines')

df_Top_U1 = df_Top_U1.sort_values('y')
merged_top = pd.merge_asof(merged_lateral, df_Top_U1, left_on='xlines', right_on='y', direction='nearest', suffixes=('', '_top'))
merged_top = merged_top.sort_values('xlines')

df_Bottom_U1 = df_Bottom_U1.sort_values('y')
merged_bottom = pd.merge_asof(merged_top, df_Bottom_U1, left_on='xlines', right_on='y', direction='nearest', suffixes=('', '_bottom'))


result_df = merged_bottom[['wells', 'inlines', 'xlines', 'z', 'z_top', 'z_bottom']]

# датафрейм с координатами TVD
result_df.columns = ['wells', 'inlines', 'xlines', 'Lateral_trend', 'Top_TVD', 'Bottom_TVD']

In [27]:
#добавляем колонки по МД и переводим в словарь для упрощения
result_df.loc[:, 'Top_MD'] = None
result_df.loc[:, 'Bottom_MD'] = None
list_of_dicts = result_df.to_dict(orient='list')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [28]:
result_df

Unnamed: 0,wells,inlines,xlines,Lateral_trend,Top_TVD,Bottom_TVD,Top_MD,Bottom_MD
0,660R,450928.6364,7011087.0,81.747803,-2786.688477,-2875.321045,,
1,661R,457638.9512,7011792.0,75.619003,-2764.798584,-2843.564209,,
2,662R,455158.133924,7014716.0,83.279999,-2762.990479,-2843.939941,,
3,668R,449834.86,7014866.0,78.683403,-2762.426514,-2854.495117,,


In [29]:
dict_final = process_well_data_3('C:/HV/Seismic/datas/test_3_2/Inklinometria', list_of_dicts)

In [30]:
df_table_coor = pd.DataFrame(dict_final)

In [31]:
df_table_coor

Unnamed: 0,wells,inlines,xlines,Lateral_trend,Top_TVD,Bottom_TVD,Top_MD,Bottom_MD
0,660R,450928.6364,7011087.0,81.747803,-2786.688477,-2875.321045,2900.17848,2988.81104
1,661R,457638.9512,7011792.0,75.619003,-2764.798584,-2843.564209,2893.78858,2972.55421
2,662R,455158.133924,7014716.0,83.279999,-2762.990479,-2843.939941,2876.09048,2957.03994
3,668R,449834.86,7014866.0,78.683403,-2762.426514,-2854.495117,2867.56651,2959.63512


In [32]:
def process_txt_files_2(folder_path, df):
    
    average_values = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            name_part = filename.split('_')[0]
            well_row = df[df['wells'] == name_part]
            
            if not well_row.empty:
                top_u1 = well_row['Top_MD'].values[0]
                bottom_u1 = well_row['Bottom_MD'].values[0] - (well_row['Bottom_MD'].values[0] - top_u1) / 2
                file_path = os.path.join(folder_path, filename)
                with open(file_path, 'r') as file:
                    for line in file:
                        if line.startswith('~Ascii'):
                            break

                    values = []
                    for line in file:
                        parts = line.split()
                        if len(parts) >= 2:
                            try:
                                first_value = float(parts[0]) 
                                
                                if top_u1 <= first_value <= bottom_u1:
                                    value = parts[1]
                                    if value != '-999.250000':
                                        values.append(float(value))
                            except ValueError:
                                continue  

                    if values:
                        average_value = sum(values) / len(values)
                        average_values.append(average_value)
                    else:
                        average_values.append(None)  
    df['среднее_значение'] = average_values

    return df


In [33]:
folder_path = 'C:/HV/Seismic/datas/test_3_2/karotags'   
df_facii  = process_txt_files_2(folder_path, df_table_coor)

In [34]:
df_facii

Unnamed: 0,wells,inlines,xlines,Lateral_trend,Top_TVD,Bottom_TVD,Top_MD,Bottom_MD,среднее_значение
0,660R,450928.6364,7011087.0,81.747803,-2786.688477,-2875.321045,2900.17848,2988.81104,15.228834
1,661R,457638.9512,7011792.0,75.619003,-2764.798584,-2843.564209,2893.78858,2972.55421,13.880001
2,662R,455158.133924,7014716.0,83.279999,-2762.990479,-2843.939941,2876.09048,2957.03994,12.516621
3,668R,449834.86,7014866.0,78.683403,-2762.426514,-2854.495117,2867.56651,2959.63512,12.392252


In [35]:
def plot_with_trendline(df, x_col, y_col, name_col):
    """
    Функция для построения графика с линией тренда и значением R^2,
    а также отображением всех имен при наведении на точки.

    :param df: DataFrame, содержащий данные
    :param x_col: str, имя колонки для оси X
    :param y_col: str, имя колонки для оси Y
    :param name_col: str, имя колонки с именами для отображения
    """
    # Преобразование значений в колонке с именами в строковый формат
    df[name_col] = df[name_col].astype(str)

    # Создание новой колонки с объединенными именами для одинаковых координат
    df['hover_names'] = df.groupby([x_col, y_col])[name_col].transform(lambda x: ', '.join(x))

    # Извлечение данных
    x = df[x_col]
    y = df[y_col]
    hover_names = df['hover_names']

    # Вычисление коэффициентов линейной регрессии
    coefficients = np.polyfit(x, y, 1)
    polynomial = np.poly1d(coefficients)
    trendline = polynomial(x)

    # Вычисление R^2
    r_squared = 1 - (np.sum((y - trendline) ** 2) / np.sum((y - np.mean(y)) ** 2))
    r_sqrt = round(np.sqrt(r_squared), 3)

    # Форматирование уравнения линии тренда
    slope = coefficients[0]
    intercept = coefficients[1]
    equation = f'y = {slope:.2f}x + {intercept:.2f}'


    # Создание графика
    fig = go.Figure()

    # Добавление точки данных с текстом для отображения имен
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='markers',
        name='Скважины',
        text=hover_names,  # Текст для отображения при наведении
        hoverinfo='text'  # Отображение только текста
    ))

    # Добавление линии тренда
    fig.add_trace(go.Scatter(x=x, y=trendline, mode='lines', name='Линия тренда'))

    # Добавление аннотации с R^2
    fig.add_annotation(
        x=0.5,
        y=0.9,
        text=f'|R| = {r_sqrt:.2f}<br>{equation}',
        showarrow=False,
        font=dict(size=16),
        xref="paper",
        yref="paper"
    )

    # Настройки графика
    fig.update_layout(
        title='Скважины с линией тренда',
        xaxis_title=x_col,
        yaxis_title=y_col
    )

    # Показать график
    fig.show()
    
    if r_sqrt < 0.7:
        print('Полученную зависимость нельзя использовать в качестве тренда. Обратите внимание на другие сейсмические атрибуты.')
    else:
        print('Полученная зависимость пригодна для использования в качестве тренда. Тест пройден успешно')


In [36]:
plot_with_trendline(df_facii, 'Lateral_trend', 'среднее_значение', 'wells')

Полученную зависимость нельзя использовать в качестве тренда. Обратите внимание на другие сейсмические атрибуты.
