In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
# сформируем скомпрометированные датасеты
with open('datasets.txt', 'r') as file:
    records = [line.rstrip().split(' ') for line in file]
# print(type(records))
for line in records:
    # print(line)
    for i in range(len(line)):
        if '.' in line[i]:
            line[i] = '99999.0'
    # print(line)
# for line in records:
    # print(line)
with open('datasets_bad_long.txt', 'w') as file:
    for line in records:
        s = ' '.join(line) + '\n'
        file.write(s)

with open('datasets.txt', 'r') as file:
    records = [line.rstrip().split(' ') for line in file]
# print(type(records))
for line in records:
    # print(line)
    for i in range(len(line)):
        if '.' in line[i]:
            line[i] = str(np.float32(np.random.uniform(0.0001, 0.05)))
    # print(line)
# for line in records:
    # print(line)
with open('datasets_bad_short.txt', 'w') as file:
    for line in records:
        s = ' '.join(line) + '\n'
        file.write(s)

In [3]:
def get_X(datasets_file: str):
    with open(datasets_file, 'r') as file:
        records = [line.rstrip().split() for line in file]
    X = []
    for line in records:
        if (n := len(line)) > 1:
            # print(line)
            for i in range(0, n - 2, 2):
                X.append(np.array([np.float32(x) for x in line[i:i + 2 + 1]]))
        else:
            x = line[0]
            X.append(np.array([np.float32(x), np.float32(np.random.uniform(0.0001, 0.05)), np.float32(x)]))
    X = np.vstack(X)
    return X

In [4]:
X = get_X('datasets.txt')
X.shape

(40697, 3)

In [5]:
X = np.vstack((X, get_X('datasets_bad_long.txt')))
X.shape

(81394, 3)

In [6]:
X = np.vstack((X, get_X('datasets_bad_short.txt')))
X.shape

(122091, 3)

In [7]:
def get_y(X):
    # Создание маски, которая проверяет наличие 99999.0 в строках X
    mask1 = (X[:, 1] == 99999.0)
    mask2 = (X[:, 1] < 0.05)
    mask = mask1 | mask2

    # Создание y на основе маски
    y = mask.astype(int)

    # # Дополнительный вывод для проверки правильности соответствия
    # print("\nПроверка соответствия:")
    # for i in range(X.shape[0]):
    #     if y[i] == 0:
    #         print(f"Строка {i+1} в X: {X[i]} -> метка в y: {y[i]}")
    return np.array(y)

In [8]:
y = get_y(X)
y.shape

(122091,)

In [9]:
def norm_bad_datasemples(X):
    mask = (X[:, 1] == 99999.0)
    X[mask, 1] = np.float32(np.random.uniform(2.0, 4.0, size=np.sum(mask)))
    return X

In [10]:
X = norm_bad_datasemples(X)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

In [12]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(97672, 3) (97672,) (24419, 3) (24419,)


In [13]:
# Создание модели
modls = []
for i in range(1, 3):
    model = models.Sequential([
        layers.Dense(i, input_shape=(3,), activation='relu'),
        # layers.Dense(16, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

    # Компиляция модели
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Обучение модели
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.3, verbose=False)

    # Оценка модели
    loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
    print(f"i = {i}: Test Accuracy: {accuracy:.4f}")
    modls.append(model)

1: Test Accuracy: 0.6947
2: Test Accuracy: 0.6077


In [14]:
# Пример предсказания
examples = np.array([[[46000, 2.0001, 10800]],
                    [[46000, 1.801, 10800]],
                    [[46000, 1.5009, 10800]],
                    [[46000, 1.19, 10800]],
                    [[46000, 0.999, 10800]],
                    [[46000, 0.501, 10800]],
                    [[46000, 0.1013, 10800]],
                    [[46000, 0.0901, 10800]],
                    [[46000, 0.0501, 10800]],
                    [[46000, 0.0101, 10800]],
                    [[46000, 0.0081, 10800]],
                    [[46000, 0.0051, 10800]],
                    [[46000, 0.0011, 10800]],
                    [[46000, 0.0001, 10800]],
                    [[46000, 0.00009, 10800]]])
for example in examples:
    prediction = model.predict(example)
    print(f"Prediction for {example}: {prediction[0][0]:.4f}")

Prediction for [[4.6000e+04 2.0001e+00 1.0800e+04]]: 1.0000
Prediction for [[4.600e+04 1.801e+00 1.080e+04]]: 1.0000
Prediction for [[4.6000e+04 1.5009e+00 1.0800e+04]]: 0.9999
Prediction for [[4.60e+04 1.19e+00 1.08e+04]]: 0.9996
Prediction for [[4.60e+04 9.99e-01 1.08e+04]]: 0.9991
Prediction for [[4.60e+04 5.01e-01 1.08e+04]]: 0.9892
Prediction for [[4.600e+04 1.013e-01 1.080e+04]]: 0.9272
Prediction for [[4.60e+04 9.01e-02 1.08e+04]]: 0.9233
Prediction for [[4.60e+04 5.01e-02 1.08e+04]]: 0.9080
Prediction for [[4.60e+04 1.01e-02 1.08e+04]]: 0.8902
Prediction for [[4.60e+04 8.10e-03 1.08e+04]]: 0.8894
Prediction for [[4.60e+04 5.10e-03 1.08e+04]]: 0.8878
Prediction for [[4.60e+04 1.10e-03 1.08e+04]]: 0.8860
Prediction for [[4.60e+04 1.00e-04 1.08e+04]]: 0.8853
Prediction for [[4.60e+04 9.00e-05 1.08e+04]]: 0.8853


In [15]:
for i in range(13300, 13400):
    print(i, X_train[i], y_train[i])

13300 [9.8000000e+04 2.5042953e-02 1.0400000e+04] 1
13301 [1.0600000e+04 2.7397912e+00 1.0000000e+04] 1
13302 [1.040000e+04 2.275697e+00 1.020000e+04] 1
13303 [1.0100e+04 2.5527e-01 9.9000e+04] 0
13304 [1.180000e+04 3.273456e-02 1.180000e+04] 1
13305 [9.8000000e+04 4.5029805e-03 1.1100000e+04] 1
13306 [1.0800000e+04 3.1813398e-02 1.0200000e+04] 1
13307 [1.0000e+04 3.0599e-01 1.1200e+04] 0
13308 [9.800000e+04 2.558433e-02 1.200000e+04] 1
13309 [9.8000000e+04 3.8126774e+00 9.9000000e+04] 1
13310 [1.0100000e+04 1.4943664e-02 1.1000000e+04] 1
13311 [1.1400000e+04 3.5326176e+00 1.0700000e+04] 1
13312 [1.1000e+04 1.6207e-01 1.0900e+04] 0
13313 [9.8000000e+04 3.5635824e+00 6.5506000e+04] 1
13314 [9.9000000e+04 2.0554352e+00 1.2200000e+04] 1
13315 [1.1000e+04 3.3217e-01 9.7000e+04] 0
13316 [1.020000e+04 4.746144e-02 1.100000e+04] 1
13317 [1.060e+04 9.393e-02 1.180e+04] 0
13318 [1.2100e+04 1.3005e-01 1.2100e+04] 0
13319 [9.800000e+04 3.560791e-02 9.800000e+04] 1
13320 [6.5506000e+04 4.7812212e-

In [16]:
mask = (X[:, 1] < 0.05)
mins = X[mask, 1]
mins.shape

(47698,)

In [17]:
mins.sort()
mins

array([0.00010031, 0.00010037, 0.00010156, ..., 0.04999673, 0.04999759,
       0.04999995], dtype=float32)

In [18]:
print(np.min(mins))

0.00010030776
