In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
from pykrige.rk import RegressionKriging
from pykrige.ok import OrdinaryKriging
from pykrige.uk import UniversalKriging

import warnings
warnings.simplefilter(action='ignore')

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [4]:
"""
Удаление выбросов с использованием межквартильного размаха (IQR)
"""
def get_filter_df(df, col):
    # Определяем Q1 и Q3 квартили
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    
    # Вычисляем IQR
    IQR = Q3 - Q1
    
    # Определяем границы для определения выбросов
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Фильтруем выбросы
    return df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

In [5]:
"""
Нормализация данных
"""
def get_norm_data(df, col):
    col_mean = df[col].mean()
    col_var = df[col].var()
    df[col] = (df[col] - col_mean)/col_var
    return df, col_mean, col_var

## Исхоные данные

In [6]:
"""
Чтение информации о скважинах
"""
df_boreholes = df_boreholes = pd.read_excel(f"data/координаты_скважин.xlsx")
house_3 = pd.read_excel("data/house_3_local.xlsx")

all_E = pd.DataFrame(columns=["borehole", "x", "y", "z", "h", "E"])

for borehole in house_3["тсз"]:
    """
    Запись данных по скважинам в один датафрейм
    """
    borehole_info = house_3[house_3["тсз"]==borehole]
    df_now = pd.read_excel(f"data/тсз{borehole}.xlsx")
    df_now = get_filter_df(df_now, "E").loc[:, ["h", "E"]]
    
    df_now["borehole"] = borehole 
    df_now["x"] = borehole_info["x"].iloc[0]
    df_now["y"] = borehole_info["y"].iloc[0]
    df_now["z"] = borehole_info["z"].iloc[0]

    all_E = pd.concat([all_E, df_now], ignore_index=True)

all_E["z_h"] = all_E["z"] - all_E["h"]
print(all_E.head(), all_E.shape)

"""
Деление без остатка абсолютной отметки на 1 и запись значение в 'group'
группировка и вычисление средних значений
"""
all_E["group"] = all_E["z_h"]//1
group_E = all_E.groupby(by=["group", "borehole"]).mean()
group_E["group"] = [int(x[0]) for x in group_E.index]
group_E["borehole"] = [int(x[1]) for x in group_E.index]

"""
Коэф. перехода значений X и Y к размерам реального фундамента, при условии что крайние точки лежат на периметре фундамента
"""
coeff_dist_Y = 27/np.linalg.norm(house_3.query("тсз == 59").loc[:, ["x", "y"]].to_numpy() - house_3.query("тсз == 51").loc[:, ["x", "y"]].to_numpy())
coeff_dist_X = 26/np.linalg.norm(house_3.query("тсз == 123").loc[:, ["x", "y"]].to_numpy() - house_3.query("тсз == 51").loc[:, ["x", "y"]].to_numpy())

  borehole             x            y       z     h    E     z_h
0       51  97607.570697  4338.997129  76.631  0.00  0.0  76.631
1       51  97607.570697  4338.997129  76.631  0.05  6.0  76.581
2       51  97607.570697  4338.997129  76.631  0.10  6.0  76.531
3       51  97607.570697  4338.997129  76.631  0.15  6.0  76.481
4       51  97607.570697  4338.997129  76.631  0.20  6.0  76.431 (24429, 7)


## 2d кригинг

In [7]:
"""
Тренировочная выборка - рассматривается часть точек в плане
Тестовая выборка - оставшиеся точки в плане
"""

boreholes_train = [51, 55, 59, 
                   69, 73, 77, 
                   87, 91, 95, 
                   105, 109, 113,
                   123, 127, 131,
                  ]

model = 'exponential'
df_now = group_E.query('group == 65')

"""
Разбивка определенных скважин
"""
data_train = df_now.query('borehole in @boreholes_train')
data_train, mean_train, var_train = get_norm_data(data_train, "E")

data_test = df_now.query('borehole not in @boreholes_train')
data_test, mean_test, var_test = get_norm_data(data_test, "E")

x_train = data_train.loc[:, ["x", "y"]].to_numpy()
x_test = data_test.loc[:, ["x", "y"]].to_numpy()
target_train = data_train.loc[:, ["E"]].to_numpy().reshape(-1)
target_test = data_test.loc[:, ["E"]].to_numpy().reshape(-1)
"""
Кригинг
"""
x = x_train[:, 0]
y = x_train[:, 1]
E = target_train

UK = UniversalKriging(x, 
                          y, 
                          E, 
                          variogram_model=model,
                          #variogram_parameters={'sill':28, 'range': 50, 'nugget': 1}
                         )

predict_test, var = UK.execute('points', x_test[:, 0], x_test[:, 1])

target_test = target_test * var_test + mean_test
predict_test = predict_test * var_train + mean_train
print(var_test, mean_test)
print(var_train, mean_train)
print(target_train.shape, r2_score(target_test, predict_test), mean_absolute_error(target_test, predict_test))

10.345074114513185 10.622355164573317
8.677544855559955 9.577631578947368
(15,) 0.22998274427862087 2.0413821383148623
