In [7]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.cluster import KMeans
import warnings
from scipy.interpolate import CubicSpline

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'

Загрузка данных

In [8]:
path = "../input/task3/"
# Read data
initial_row_data = pd.read_pickle(f"{path}curves.pkl")

Обучение модели. Получение кластеров. Визуализация. Метод средних значений

In [9]:
# Calculation of average log value
initial_data = initial_row_data.copy()
initial_data['Average'] = initial_data['log'].apply(np.mean)
initial_data.to_csv('ave.csv')
# Defining of model
m = KMeans(n_clusters=6)
# m = GaussianMixture(n_components=2)
# Fit the model
m.fit(initial_data['Average'].to_numpy().reshape(-1, 1))
# Predict the cluster for each data point
initial_data['predict'] = m.predict(initial_data['Average'].to_numpy().reshape(-1, 1))

# Data saving
initial_data['predict'].to_csv("Petroleum_team_120_3(1).csv", index=False)
# Visualisation
fig = go.Figure(data=go.Heatmap(x=initial_data['X'],
                                y=initial_data['Y'],
                                z=initial_data['predict'],
                                ))
fig.update_layout(title="Визуализация кластеров. Метод средних значений", title_x=0.5,
                  title_font={"size": 25},
                  xaxis_title="Координата Х",
                  yaxis_title="Координата Y")
fig.show()

Обучение модели. Получение кластеров. Визуализация. Метод интерполяции

In [10]:
initial_data = initial_row_data.copy()
initial_data['new_log'] = None
# Defining array with max length
initial_data['Length'] = initial_data['log'].apply(lambda x: len(x))
max_length = initial_data['Length'].max()
# Defining a function of cubic interpolation
interpolation = initial_data['log'].apply(lambda x: CubicSpline(list(range(len(x))), x))
# Cubic interpolation of data
for i, row in initial_data.iterrows():
    initial_data['new_log'].iloc[i] = [float(interpolation.iloc[i](j)) for j in
                                       (np.linspace(0, len(initial_data['log'].iloc[i]), max_length))]

# Defining of model
m = KMeans(n_clusters=7)
# Fit the model
m.fit(initial_data['new_log'].to_list())
# Predict the cluster for each data point
initial_data['predict'] = m.predict(initial_data['new_log'].to_list())

# Data saving
initial_data['predict'].to_csv("Petroleum_team_120_3(2).csv", index=False)
# Visualisation
fig = go.Figure(data=go.Heatmap(x=initial_data['X'],
                                y=initial_data['Y'],
                                z=initial_data['predict'],
                                ))
fig.update_layout(title="Визуализация кластеров. Метод интерполяции", title_x=0.5,
                  title_font={"size": 25},
                  xaxis_title="Координата Х",
                  yaxis_title="Координата Y")
fig.show()

Пример интерполяции

In [11]:
from plotly.subplots import make_subplots

# Set to True, if you need to check example of interpolation, otherwise -  False
show_interpolation_example = True
num_well = 100
if show_interpolation_example:
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(go.Scatter(name="Initial log",
                             x=initial_data['log'].iloc[num_well],
                             y=list(range(len(initial_data['log'].iloc[num_well]) + 1)),
                             mode="markers",
                             marker={"size": 9}
                             ),
                  secondary_y=False)
    fig.add_trace(go.Scatter(name="Interpolated log",
                             x=initial_data['new_log'].iloc[num_well],
                             y=list(range(len(initial_data['new_log'].iloc[num_well]) + 1)),
                             mode="markers+lines",
                             marker={"size": 6},
                             opacity=0.6
                             ),
                  secondary_y=True)
    fig.update_layout(title="Пример интерполяции", title_x=0.5,
                      title_font={"size": 25},
                      xaxis_title="Показание каротажа",
                      )
    fig.update_yaxes(title_text="Позиция точки в массиве", secondary_y=False)
    fig.update_yaxes(title_text="Позиция точки в массиве", secondary_y=True)
    fig.update_layout(legend=dict(orientation="h",
                                  font={"size":15}
                                  ))
    fig.show()