In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import numpy as np
from tqdm import tqdm

warnings.simplefilter(action="ignore")


def filter_ones(data):
    return data.WEFAC == 1


def filter_more_than_zero(data):
    return data.WEFAC > 0


def filter_all(data):
    return data.WEFAC >= 0


def filter_zeros(data):
    return data.WEFAC == 0


columns = ["WWPR", "WOPR", "WGPR", "WGIR", "WWIR", "WBHP", "WTHP"]


def draw_scaled(data, well):
    columns = ["WWPR", "WOPR", "WGPR", "WBHP", "WTHP", "WGIR", "WWIR"]

    filters = [filter_ones, filter_more_than_zero, filter_all, filter_zeros]
    filter_names = ["WEFAC==1", "WEFAC>0", "WEFAC>=0", "WEFAC==0"]
    sns.set(rc={"figure.figsize": (12, 30)})
    sns.set_theme(style="whitegrid")
    fig, axs = plt.subplots(nrows=4, ncols=1)
    for i, (filt, filt_name) in enumerate(zip(filters, filter_names)):
        cur_data = data[(data.WELL == well) & filt(data)].fillna(0)
        for c in columns:
            cur_data[c] = cur_data[c] / cur_data[columns].max()[c]
        cur_data = cur_data.fillna(0).reset_index()[columns + ["DATE"]]

        for c in columns:
            ax = sns.scatterplot(cur_data, x="DATE", y=c, label=c, ax=axs[i])
            sns.lineplot(cur_data, x="DATE", y=c, label=c, ax=axs[i])
        plt.legend()
        ax.set(xlabel=f"DATE", ylabel=f"Scaled params")
        ax.title.set_text(filt_name)
    plt.savefig(f"images/merged_scaled_{well}.jpg")
    plt.clf()


def draw_sub(data, well, name_prefix=""):
    columns = ["WWPR", "WOPR", "WGPR", "WGIR", "WWIR", "WBHP", "WTHP"]

    sns.set(rc={"figure.figsize": (30, 15)})
    sns.set_theme(style="whitegrid")
    fig, axs = plt.subplots(nrows=2, ncols=3)
    fig.tight_layout(pad=10)
    cur_data = (
        data[(data.WELL == well) & filter_all(data)]
        .fillna(0)
        .reset_index()[columns + ["DATE"]]
    )
    for i, c in enumerate(columns[:5]):
        row = i // 3
        col = i % 3
        ax = sns.scatterplot(cur_data, x="DATE", y=c, label=c, ax=axs[row][col])
        sns.lineplot(cur_data, x="DATE", y=c, label=c, ax=axs[row][col])
        ax.legend(loc="upper left", bbox_to_anchor=(1, 0.5))
        ax.set(xlabel=f"DATE", ylabel=c)
        ax.title.set_text(c)
    for c in columns[5:]:
        ax = sns.scatterplot(cur_data, x="DATE", y=c, label=c, ax=axs[1][2])
        sns.lineplot(cur_data, x="DATE", y=c, label=c, ax=axs[1][2])
        ax.legend(loc="upper left", bbox_to_anchor=(1, 0.5))
        ax.set(xlabel=f"DATE", ylabel=" & ".join(columns[5:]))
        ax.title.set_text(" & ".join(columns[5:]))
    plt.savefig(f"images/{name_prefix}six_plots_{well}.jpg")
    plt.clf()


def draw_prod_rate(data, well, name_prefix=""):
    columns = ["WLPROD", "WWPROD", "WOPROD", "GOR"]
    sns.set(rc={"figure.figsize": (30, 15)})
    sns.set_theme(style="whitegrid")
    fig, axs = plt.subplots(nrows=2, ncols=2)
    fig.tight_layout(pad=10)
    cur_data = (
        data[(data.WELL == well) & filter_all(data)]
        .fillna(0)
        .reset_index()[columns + ["DATE"]]
    )
    for i, c in enumerate(columns):
        row = i // 2
        col = i % 2
        ax = sns.scatterplot(cur_data, x="DATE", y=c, label=c, ax=axs[row][col])
        sns.lineplot(cur_data, x="DATE", y=c, label=c, ax=axs[row][col])
        ax.legend(loc="upper left", bbox_to_anchor=(1, 0.5))
        ax.set(xlabel=f"DATE", ylabel=c)
        ax.title.set_text(c)
    plt.savefig(f"images/{name_prefix}prod_rate_{well}.jpg")
    plt.clf()

In [None]:
data = pd.read_excel("./data/Исходник_для_аномалий.xlsx", decimal=".")
columns = ["WWPR", "WOPR", "WGPR", "WBHP", "WTHP", "WGIR", "WWIR"]
for c in columns + ["WEFAC"]:
    if data[c].dtype == "O":
        data[c] = pd.Series(
            data[c].astype(str).apply(lambda x: x.replace(",", ".")).astype("float64")
        )
data

In [None]:
data["WLPROD"] = (data["WWPR"] + data["WOPR"]) / (180 - data["WBHP"])
data["WWPROD"] = (data["WWPR"]) / (180 - data["WBHP"])
data["WOPROD"] = (data["WOPR"]) / (180 - data["WBHP"])
data["GOR"] = (data["WGPR"]) / (data["WOPR"])
data

### Draw scaled and absolute

In [None]:
for well in tqdm(np.unique(data.WELL)):
    draw_scaled(data, well)

In [None]:
for well in tqdm(np.unique(data.WELL)):
    draw_sub(data, well)

## Draw prod rate

In [None]:
for well in tqdm(np.unique(data.WELL)):
    draw_prod_rate(data, well, name_prefix="")

## Try sliding window

In [None]:
def slided(data, window=3):
    data = data.copy(deep=True)
    columns = ["WWPR", "WOPR", "WGPR", "WGIR", "WWIR", "WBHP", "WTHP"]
    for well in tqdm(np.unique(data.WELL)):
        indexes = (data.WELL == well) & filter_all(data)
        cur_data = data[indexes].fillna(0)
        for i, c in enumerate(columns):
            data.loc[indexes, [c]] = list(
                cur_data[c].rolling(window, min_periods=1, center=True).mean()
            )
    return data


def sub(data, data_slided):
    data = data.copy(deep=True)
    columns = ["WWPR", "WOPR", "WGPR", "WGIR", "WWIR", "WBHP", "WTHP"]
    data.loc[:, columns] = data.loc[:, columns] - data_slided7.loc[:, columns]
    return data

In [None]:
data_slided7 = slided(data, window=7)
for well in tqdm(np.unique(data.WELL)):
    draw_sub(data_slided7, well, name_prefix="slided_7_")

In [None]:
data_sub7 = sub(data, data_slided7)
for well in tqdm(np.unique(data.WELL)):
    draw_sub(data_sub7, well, name_prefix="sub_7_")

In [None]:
data_slided3 = slided(data, window=3)
for well in tqdm(np.unique(data.WELL)):
    draw_sub(data_slided3, well, name_prefix="slided_3_")

In [None]:
data_sub3 = sub(data, data_slided3)
for well in tqdm(np.unique(data.WELL)):
    draw_sub(data_sub3, well, name_prefix="sub_3_")