In [1]:
import sys
import os
sys.path.append(os.getcwd() + "/../")

In [2]:
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd

In [None]:
class WellSimMetric:
    """
    Рассчитывает похожесть двух скважин.

    Мы называем скважины похожими, если при построении линейных регрессий 
    они ведут себя на интервалах одинаково. Под "одинаково" я имею в виду, 
    что забойное давление при одиныковых условиях у них не отличается более,
    чем на 10%.
    """

    def __init__(self, 
                 X1_train: np.ndarray, 
                 X2_train: np.ndarray, 
                 y1_train: np.ndarray, 
                 y2_train: np.ndarray):
        """Initialization and train linear regression.

        Args:
            X1_train (np.ndarray): фичи для скважины 1
            X2_train (np.ndarray): для 2
            y1_train (np.ndarray): давление на забое для скважины 1
            y2_train (np.ndarray): для 2
        """
        self.X1_train = X1_train
        self.X2_train = X2_train
        self.y1_train = y1_train
        self.y2_train = y2_train

        # Fit linear regression models
        self.model1 = LinearRegression().fit(X1_train, y1_train)
        self.model2 = LinearRegression().fit(X2_train, y2_train)
    
    def get_error(self, y1_pred: np.ndarray, y2_pred: np.ndarray) -> np.ndarray:
        """Подсчитывает ошибку между двумя предсказаниями

        Args:
            y1_pred (np.ndarray): предсказанное давление на забое для скважины 1
            y2_pred (np.ndarray): для 2
        """
        return np.abs(y1_pred - y2_pred) / ((y1_pred + y2_pred) / 2)

    def get_sim(self):
        X = np.concatenate([self.X1_train, self.X2_train])

        # Predict target variable for both wells
        y1_pred = self.model1.predict(X)
        y2_pred = self.model2.predict(X)

        # Calculate similarity
        similarity = (self.get_error(y1_pred, y2_pred) <= 0.1).astype(int)

        return similarity

In [4]:
df = pd.read_csv("../data/cleaned/data.csv", index_col=0)
df

Unnamed: 0,Обводненность_для_расчета_PVT_параметров,Глубина_по_стволу_cs,Внутренний_диаметр_эксплуатационной_колонны_cs,Внутренняя_шероховатость_эксплуатационной_колонны_cs,Глубина_по_стволу,Внутренний_диаметр_НКТ,Внутренняя_шероховатость_НКТ,Внешний_диаметр_НКТ,Внутренний_диаметр_эксплуатационной_колонны,FILTERED_MD_LAST_VALUE,...,Коэффициент_продуктиности,Глубина_спуска_ЭЦН_по_стволу_MD,Частота_ЭЦН,Количество_ступеней,Коэффициент_износа,Содержание_растворенного_газа,Плотность_нефти,Удельный_вес_газа,Корреляция_вязкости_нефти,Пластовое_давление
0,1.200000,3026.0,0.1130,0.000152,2851.75,0.062,0.000152,0.073,0.1598,3026.00,...,5.967998,2851.749877,60.000000,354,0.90,143.899995,867.399978,0.87940,1,300.103133
1,89.000000,3160.0,0.1596,0.000015,3145.00,0.062,0.000015,0.073,0.1596,3960.00,...,5.503470,3144.999921,46.000000,401,0.00,255.133001,858.935001,0.66160,1,218.711452
2,61.099998,3160.0,0.1596,0.000015,3145.00,0.062,0.000015,0.073,0.1596,3960.00,...,6.423728,3144.999921,57.000000,375,0.50,255.133001,858.935001,0.66160,1,160.364419
3,94.150002,3491.9,0.1596,0.000152,3135.00,0.062,0.000152,0.073,0.1596,3491.90,...,0.574879,3134.999862,47.000000,418,-0.05,255.132006,858.935001,0.66160,2,187.037349
4,82.199997,3490.6,0.1596,0.000152,3149.00,0.062,0.000152,0.073,0.1596,3625.24,...,4.112801,3149.000123,51.200001,404,0.20,255.132006,858.935001,0.66160,2,186.350903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,84.300003,3656.0,0.1596,0.000152,3314.37,0.062,0.000015,0.073,0.1596,4014.00,...,2.555688,3316.769902,52.500000,318,0.25,255.132006,858.935001,0.66160,2,148.400821
176,91.800003,3426.0,0.1596,0.000152,3157.89,0.062,0.000152,0.073,0.1596,3450.00,...,2.136903,3157.889925,53.000000,342,0.00,130.000005,869.000005,0.79833,1,205.963356
177,89.650002,3453.0,0.1596,0.000015,3057.00,0.062,0.000015,0.073,0.1596,3590.00,...,13.460412,3056.999934,47.900002,318,0.02,130.000005,869.000005,0.79833,1,124.571692
178,80.699997,3160.0,0.1596,0.000015,3058.50,0.062,0.000015,0.073,0.1596,3960.00,...,2.431851,3058.500122,47.500000,401,-0.25,255.133001,858.935001,0.66160,1,161.737294


In [7]:
df.corr().abs().style.background_gradient(cmap='coolwarm')

Unnamed: 0,Обводненность_для_расчета_PVT_параметров,Глубина_по_стволу_cs,Внутренний_диаметр_эксплуатационной_колонны_cs,Внутренняя_шероховатость_эксплуатационной_колонны_cs,Глубина_по_стволу,Внутренний_диаметр_НКТ,Внутренняя_шероховатость_НКТ,Внешний_диаметр_НКТ,Внутренний_диаметр_эксплуатационной_колонны,FILTERED_MD_LAST_VALUE,FILTERED_TVD_LAST_VALUE,Газовый_фактор,Коэффициент_продуктиности,Глубина_спуска_ЭЦН_по_стволу_MD,Частота_ЭЦН,Количество_ступеней,Коэффициент_износа,Содержание_растворенного_газа,Плотность_нефти,Удельный_вес_газа,Корреляция_вязкости_нефти,Пластовое_давление
Обводненность_для_расчета_PVT_параметров,1.0,0.069288,0.215585,0.221408,0.16245,0.171651,0.083619,0.228179,0.048906,0.043712,0.086348,0.207163,0.076008,0.253699,0.22874,0.215207,0.113312,0.080067,0.091638,0.151553,0.137453,0.173392
Глубина_по_стволу_cs,0.069288,1.0,0.147945,0.03799,0.66485,0.481459,0.010674,0.508891,0.309616,0.374321,0.109411,0.149736,0.021812,0.368636,0.311839,0.31964,0.031813,0.053956,0.061299,0.109803,0.264291,0.010498
Внутренний_диаметр_эксплуатационной_колонны_cs,0.215585,0.147945,1.0,0.297272,0.114948,0.030786,0.11819,0.026511,0.554616,0.127245,0.07561,0.106524,0.019079,0.281994,0.255076,0.272264,0.060398,0.096513,0.009134,0.022744,0.03543,0.063037
Внутренняя_шероховатость_эксплуатационной_колонны_cs,0.221408,0.03799,0.297272,1.0,0.078296,0.165349,0.38388,0.179391,0.148703,0.12032,0.288188,0.17517,0.059616,0.011567,0.01674,0.034118,0.044155,0.110607,0.066677,0.08601,0.110222,0.032932
Глубина_по_стволу,0.16245,0.66485,0.114948,0.078296,1.0,0.63445,0.014636,0.681883,0.292987,0.201127,0.070253,0.194575,0.000393,0.41289,0.314139,0.349195,0.054222,0.033641,0.115874,0.152043,0.21542,0.123064
Внутренний_диаметр_НКТ,0.171651,0.481459,0.030786,0.165349,0.63445,1.0,0.029192,0.94171,0.11039,0.128562,0.006147,0.120132,0.052294,0.409359,0.416286,0.411154,0.136703,0.075255,0.169144,0.308202,0.288326,0.129684
Внутренняя_шероховатость_НКТ,0.083619,0.010674,0.11819,0.38388,0.014636,0.029192,1.0,0.04736,0.056301,0.136455,0.121718,0.110562,0.015847,0.050843,0.055202,0.000121,0.004933,0.039591,0.035457,0.084248,0.105263,0.086537
Внешний_диаметр_НКТ,0.228179,0.508891,0.026511,0.179391,0.681883,0.94171,0.04736,1.0,0.122997,0.102295,0.098955,0.152094,0.057377,0.375049,0.382364,0.377257,0.126607,0.073914,0.160964,0.292732,0.272553,0.090944
Внутренний_диаметр_эксплуатационной_колонны,0.048906,0.309616,0.554616,0.148703,0.292987,0.11039,0.056301,0.122997,1.0,0.092307,0.015262,0.011025,0.009718,0.121552,0.089868,0.116762,0.078023,0.042122,0.036382,0.025972,0.066626,0.026398
FILTERED_MD_LAST_VALUE,0.043712,0.374321,0.127245,0.12032,0.201127,0.128562,0.136455,0.102295,0.092307,1.0,0.201412,0.058547,0.072268,0.134753,0.042354,0.060902,0.155578,0.073092,0.017825,0.275204,0.278534,0.20057
