In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
# сформируем скомпрометированные датасеты
with open('datasets.txt', 'r') as file:
    records = [line.rstrip().split(' ') for line in file]
# print(type(records))
for line in records:
    # print(line)
    for i in range(len(line)):
        if '.' in line[i]:
            line[i] = '99999.0'
    # print(line)
# for line in records:
    # print(line)
with open('datasets_bad.txt', 'w') as file:
    for line in records:
        s = ' '.join(line) + '\n'
        file.write(s)


In [3]:
def get_X(datasets_file: str):
    with open(datasets_file, 'r') as file:
        records = [line.rstrip().split() for line in file]
    X = []
    for line in records:
        if (n := len(line)) > 1:
            # print(line)
            for i in range(0, n - 2, 2):
                X.append(np.array([np.float32(x) for x in line[i:i + 2 + 1]]))
        else:
            x = line[0]
            X.append(np.array([np.float32(x), np.random.uniform(0.0001, 0.05), np.float32(x)]))
    X = np.vstack(X)
    return X

In [4]:
X = get_X('datasets.txt')
X.shape

(40684, 3)

In [5]:
X_bad = get_X('datasets_bad.txt')
X = np.vstack((X, X_bad))
X.shape

(81368, 3)

In [6]:
def get_y(X):
    # Создание маски, которая проверяет наличие 99999.0 в строках X
    mask1 = (X[:, 1] == 99999.0)
    mask2 = (X[:, 1] < 0.05)
    mask = mask1 | mask2

    # Создание y на основе маски
    y = mask.astype(int)

    # # Дополнительный вывод для проверки правильности соответствия
    # print("\nПроверка соответствия:")
    # for i in range(X.shape[0]):
    #     if y[i] == 0:
    #         print(f"Строка {i+1} в X: {X[i]} -> метка в y: {y[i]}")
    return np.array(y)

In [7]:
y = get_y(X)
y.shape

(81368,)

In [8]:
def norm_bad_datasemples(X):
    mask = (X[:, 1] == 99999.0)
    X[mask, 1] = np.float32(np.random.uniform(2.0, 4.0, size=np.sum(mask)))
    return X

In [9]:
X = norm_bad_datasemples(X)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

In [11]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(56957, 3) (56957,) (24411, 3) (24411,)


In [12]:
# Создание модели
model = models.Sequential([
    layers.Dense(16, input_shape=(3,), activation='relu'),
    # layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Компиляция модели
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Обучение модели
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.3)

# Оценка модели
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

2024-05-20 23:04:02.070647: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5488


In [13]:
# Пример предсказания
examples = np.array([[[46000, 2.0001, 10800]],
                    [[46000, 1.801, 10800]],
                    [[46000, 1.5009, 10800]],
                    [[46000, 1.19, 10800]],
                    [[46000, 0.999, 10800]],
                    [[46000, 0.701, 10800]],
                    [[46000, 0.5013, 10800]],
                    [[46000, 0.2011, 10800]],
                    [[46000, 0.10035, 10800]],
                    [[46000, 0.0901, 10800]],
                    [[46000, 0.0701, 10800]],
                    [[46000, 0.0501, 10800]],
                    [[46000, 0.0301, 10800]],
                    [[46000, 0.0101, 10800]],
                    [[46000, 0.0091, 10800]],
                    [[46000, 0.0071, 10800]]])
for example in examples:
    prediction = model.predict(example)
    print(f"Prediction for {example}: {prediction[0][0]:.4f}")

Prediction for [[4.6000e+04 2.0001e+00 1.0800e+04]]: 0.0000
Prediction for [[4.600e+04 1.801e+00 1.080e+04]]: 0.0000
Prediction for [[4.6000e+04 1.5009e+00 1.0800e+04]]: 0.0000
Prediction for [[4.60e+04 1.19e+00 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 9.99e-01 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 7.01e-01 1.08e+04]]: 0.0000
Prediction for [[4.600e+04 5.013e-01 1.080e+04]]: 0.0000
Prediction for [[4.600e+04 2.011e-01 1.080e+04]]: 0.0000
Prediction for [[4.6000e+04 1.0035e-01 1.0800e+04]]: 0.0000
Prediction for [[4.60e+04 9.01e-02 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 7.01e-02 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 5.01e-02 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 3.01e-02 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 1.01e-02 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 9.10e-03 1.08e+04]]: 0.0000
Prediction for [[4.60e+04 7.10e-03 1.08e+04]]: 0.0000


In [18]:
for i in range(13300, 13400):
    print(i, X_train[i], y_train[i])

13300 [1.21000000e+04 1.08630002e-01 1.16000000e+04] 0
13301 [9.80000000e+04 2.91245762e-02 9.80000000e+04] 1
13302 [1.03000000e+04 2.02934384e+00 1.06000000e+04] 1
13303 [1.15000000e+04 3.12919729e-02 1.15000000e+04] 1
13304 [1.2200000e+04 3.2776351e-02 1.2200000e+04] 1
13305 [1.0100000e+04 2.4682591e+00 6.5506000e+04] 1
13306 [6.55060000e+04 3.61827707e+00 6.30000000e+04] 1
13307 [1.16000000e+04 1.09030001e-01 1.04000000e+04] 0
13308 [1.0200000e+04 2.5388906e+00 9.9000000e+04] 1
13309 [1.03000000e+04 4.99975909e-02 1.03000000e+04] 1
13310 [1.08000000e+04 3.42893672e+00 1.21000000e+04] 1
13311 [1.0500e+04 2.0803e-01 1.0200e+04] 0
13312 [1.18000000e+04 3.18359852e+00 1.06000000e+04] 1
13313 [1.17000000e+04 7.50310004e-01 6.55060000e+04] 0
13314 [1.14000000e+04 2.11225152e+00 1.07000000e+04] 1
13315 [1.030000e+04 2.221102e+00 1.120000e+04] 1
13316 [6.55050000e+04 8.82932503e-03 6.55050000e+04] 1
13317 [4.40000000e+04 3.11969995e-01 1.04000000e+04] 0
13318 [9.80000000e+04 1.81989998e-01 

In [15]:
mask = (X[:, 1] < 0.05)
mins = X[mask, 1]
mins.shape

(6995,)

In [16]:
mins.sort()
mins

array([0.00010153, 0.00010777, 0.00011247, ..., 0.04998853, 0.04999321,
       0.04999759])

In [17]:
print(np.min(mins))

0.00010153466250313222
