In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os
from collections import Counter
from scipy.interpolate import interp1d

In [4]:
def seismic_ascii_parser(file_path):
    all_data = []
    head = ""
    xarray = []
    yarray = []
    zarray = []
    with open(file_path, 'r') as f:
        content = f.readlines()
        for line in content:
            if '#' in line:
                head += line
                continue
            l = line.split(' ')
            xarray.append(float(l[0]))
            yarray.append(float(l[1]))
            zarray.append(float(l[2]))

    df = pd.DataFrame({'x': xarray, 'y': yarray, 'z': zarray})
    all_data.append({"test": df})
    return create_seismic_ascii_date(all_data)[0]

def create_seismic_ascii_date(data_frame):
    for dataframe_data in data_frame:
        _, value = next(iter(dataframe_data.items()))
        return value.to_dict("records"),


In [24]:
# Это списки со словарями формата [{'x': 445946.632991, 'y': 7004198.214381, 'z': -2884.616455}, ...]
Lateral_trend = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_5/SFA_4_klass.txt")
Top = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_5/TVDSS_Top_U1.txt")
Bottom = seismic_ascii_parser(r"C:/HV/Seismic/datas/test_3_5/TVDSS_Bottom_U1.txt")

In [25]:
df_Lateral_trend = pd.DataFrame(Lateral_trend)
df_Top_U1 = pd.DataFrame(Top)
df_Bottom_U1 = pd.DataFrame(Bottom)

In [26]:
#Датафрейм из имен скважин и их координатами. 
column_names = ['wells', 'name', 'inlines', 'xlines', 'top', 'bottom'] 
df_all_wells_coor = pd.read_csv('C:/HV/Seismic/datas/test_3_5/data_wells.txt', sep='\t', names=column_names, header=None)

In [28]:
df_Lateral_trend = df_Lateral_trend.sort_values('x')
df_Top_U1 = df_Top_U1.sort_values('x')
df_Bottom_U1 = df_Bottom_U1.sort_values('x')
df_all_wells_coor = df_all_wells_coor.sort_values('inlines')

merged_lateral = pd.merge_asof(df_all_wells_coor, df_Lateral_trend, left_on='inlines', right_on='x', direction='nearest')
merged_lateral = merged_lateral.sort_values('xlines')

df_Top_U1 = df_Top_U1.sort_values('y')
merged_top = pd.merge_asof(merged_lateral, df_Top_U1, left_on='xlines', right_on='y', direction='nearest', suffixes=('', '_top'))
merged_top = merged_top.sort_values('xlines')

df_Bottom_U1 = df_Bottom_U1.sort_values('y')
merged_bottom = pd.merge_asof(merged_top, df_Bottom_U1, left_on='xlines', right_on='y', direction='nearest', suffixes=('', '_bottom'))


result_df = merged_bottom[['wells', 'inlines', 'xlines', 'z', 'z_top', 'z_bottom']]
result_df.columns = ['wells', 'inlines', 'xlines', 'Lateral_trend', 'Top_TVD', 'Bottom_TVD']

In [29]:
#добавляем колонки по МД и переводим в словарь для упрощения
result_df.loc[:, 'Top_MD'] = None
result_df.loc[:, 'Bottom_MD'] = None
list_of_dicts = result_df.to_dict(orient='list')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [30]:
def process_well_data_3(folder_path, dict):
    copy_dict = dict.copy()
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            name_part = os.path.splitext(filename)[0]

            index = copy_dict['wells'].index(name_part)

            
            file_path = os.path.join(folder_path, filename)
            data = pd.read_csv(file_path, sep=r'\s+', comment='#', header=0, on_bad_lines='skip')

            if data.shape[1] >= 4:
                
                first_col_numeric = pd.to_numeric(data.iloc[:, 0], errors='coerce')
                first_col_interpolated = first_col_numeric.interpolate()
                fourth_col = data.iloc[:, 3]
               
                top_tvd = copy_dict['Top_TVD'][index]
                bottom_tvd = copy_dict['Bottom_TVD'][index]

                top_md = first_col_interpolated[fourth_col == top_tvd].values
                bottom_md = first_col_interpolated[fourth_col == bottom_tvd].values
                
                
                if top_md.size == 0:
                    interp_func = interp1d(fourth_col, first_col_interpolated, bounds_error=False, fill_value='extrapolate')
                    top_md = interp_func(top_tvd)

                if bottom_md.size == 0:
                    interp_func = interp1d(fourth_col, first_col_interpolated, bounds_error=False, fill_value='extrapolate')
                    bottom_md = interp_func(bottom_tvd)

                copy_dict['Top_MD'][index] = round(float(top_md), 5)
                copy_dict['Bottom_MD'][index] = round(float(bottom_md), 5)

    return copy_dict
 

In [31]:
dict_final = process_well_data_3('C:/HV/Seismic/datas/test_3_5/Inklinometria', list_of_dicts)

In [32]:
df_table_coor = pd.DataFrame(dict_final)

In [33]:
def process_txt_files(folder_path, df):
    
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            
            name_part = filename.split('_')[0]
            
            
            well_row = df[df['wells'] == name_part]
            
            if not well_row.empty:
                top_u1 = well_row['Top_MD'].values[0]
                bottom_u1 = well_row['Bottom_MD'].values[0] 
                file_path = os.path.join(folder_path, filename)

                with open(file_path, 'r') as file:
                    
                    for line in file:
                        if line.startswith('~Ascii'):
                            break

                    
                    values = []
                    for line in file:
                        parts = line.split()
                        if len(parts) >= 2:
                            try:
                                first_value = float(parts[0]) 
                                
                                if first_value >= top_u1 and first_value <= bottom_u1:
                                    
                                    value = parts[1]
                                    if value != '-999.250000':
                                        values.append(float(value))
                            except ValueError:
                                continue  

                    
                    if values:
                        value_counts = Counter(values)
                        most_common_value, most_common_count = value_counts.most_common(1)[0]

                        entry = {
                            'wells': name_part,
                            'наиболее_частое_значение': most_common_value,
                            'количество': most_common_count
                        }

                        for value, count in value_counts.items():
                            if value != most_common_value:
                                entry[f'значение_{value}'] = value
                                entry[f'количество_{value}'] = count

                        data.append(entry)

    result_df = pd.DataFrame(data)
    result_df.fillna(0, inplace=True)

    return result_df


In [34]:
folder_path = 'C:/HV/Seismic/datas/test_3_5/Log_facii'   # Укажите путь к вашей папке
df_facii  = process_txt_files(folder_path, df_table_coor)

In [35]:
df_finish = pd.merge(df_table_coor[['wells', 'Lateral_trend']], df_facii[['wells', 'наиболее_частое_значение']], on='wells', how='inner')

In [36]:
df_finish

Unnamed: 0,wells,Lateral_trend,наиболее_частое_значение
0,673R,1.0,0.0
1,676R,3.0,0.0
2,660R,2.0,0.0
3,661R,0.0,0.0
4,669R,1.0,0.0
5,663R,0.0,0.0
6,662R,1.0,0.0


In [37]:
def plot_with_trendline(df, x_col, y_col, name_col):
    """
    Функция для построения графика с линией тренда и значением R^2,
    а также отображением всех имен при наведении на точки.

    :param df: DataFrame, содержащий данные ['wells', 'Lateral_trend', 'наиболее_частое_значение']
    :param x_col: str, имя колонки для оси X
    :param y_col: str, имя колонки для оси Y
    :param name_col: str, имя колонки с именами для отображения
    """

    df[name_col] = df[name_col].astype(str)
    df['hover_names'] = df.groupby([x_col, y_col])[name_col].transform(lambda x: ', '.join(x))
    x = df[x_col]
    y = df[y_col]
    hover_names = df['hover_names']

    coefficients = np.polyfit(x, y, 1)
    polynomial = np.poly1d(coefficients)
    trendline = polynomial(x)

    r_squared = 1 - (np.sum((y - trendline) ** 2) / np.sum((y - np.mean(y)) ** 2))
    r_sqrt = round(np.sqrt(r_squared), 3)

    slope = coefficients[0]
    intercept = coefficients[1]
    equation = f'y = {slope:.2f}x + {intercept:.2f}'


    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='markers',
        name='Скважины',
        text=hover_names,  
        hoverinfo='text'  
    ))
    fig.add_trace(go.Scatter(x=x, y=trendline, mode='lines', name='Линия тренда'))

    fig.add_annotation(
        x=0.5,
        y=0.9,
        text=f'|R| = {r_sqrt:.2f}<br>{equation}',
        showarrow=False,
        font=dict(size=16),
        xref="paper",
        yref="paper"
    )

    fig.update_layout(
        title='Скважины с линией тренда',
        xaxis_title=x_col,
        yaxis_title=y_col
    )
    fig.show()
    
    if r_sqrt < 0.7:
        print('Плохая корреляция преобладающих фаций в скважинах с картой трендов')


In [38]:
plot_with_trendline(df_finish, 'Lateral_trend', 'наиболее_частое_значение', 'wells')
# из данных что мне дали полчуается вот такая картина, это нормально. Саша так сказала


invalid value encountered in scalar divide



In [39]:
#для примера
df = pd.read_excel('C:/HV/Seismic/datas/test_3_5/SFA_SO.xlsx')

In [40]:
df 

Unnamed: 0,Скважина,Сейсм класс,Скв класс1
0,9,5,1
1,16,4,1
2,8,5,3
3,3,5,2
4,2,5,1
5,17,4,1
6,7,5,2
7,5,5,2
8,14,4,1
9,15,4,1


In [41]:
plot_with_trendline(df, 'Сейсм класс', 'Скв класс1', 'Скважина')

Плохая корреляция преобладающих фаций в скважинах с картой трендов
