# ML классификатор

## Логистическая регрессия

In [75]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [76]:
# Загружаем файл с разделителем ';' в DF
df = pd.read_csv(fr'nn_features_and_target.csv', delimiter=';')
# df.columns = df.columns.astype(str)
df['29'] = df.apply(lambda x: 1.0 if x['28'] == 0.0 else 0.0, axis=1)
print(df.to_string(max_rows=8, max_cols=30))

             0         1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20   21   22   23   24   25   26   27   28   29
0     1.000000  0.949765  0.946863  0.890311  0.886746  0.810817  0.768762  0.702327  0.658250  0.519462  0.000000  0.071725  0.156965  0.237875  0.342755  0.385964  0.561642  0.618664  0.710416  0.721658  0.876098  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0
1     1.000000  0.999613  0.949249  0.946274  0.888331  0.884700  0.808215  0.764843  0.699029  0.650257  0.510770  0.000000  0.073884  0.163346  0.246354  0.359640  0.402954  0.585638  0.643909  0.737880  0.749382  0.0  1.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0
2     0.879807  0.815902  0.811820  0.731742  0.685201  0.616099  0.559507  0.401556  0.216736  0.112487  0.000000  0.101511  0.235431  0.280920  0.482243  0.544868  0.668686  0.686063  0.859401  0.888299  1.000000  0.0 

In [77]:
# Преобразование в DataSet
dataset = df.values  # Dataframe преобразуем в Dataset для Keras
dataset = dataset.astype(np.float32)  # Смена типа для корректной работы Keras
# Все, что стоит перед запятой, относится к строкам массива, а все, что стоит после запятой,
# относится к столбцам массивов.
X = dataset[:, 0:28]  # Срез массива по фичам
y = dataset[:, 28:30]  # Срез массива по labels

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train_2, y_test_2 = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

# Преобразования двумерного массива меток в одномерный
y_train = np.argmax(y_train_2, axis=1)
y_test = np.argmax(y_test_2, axis=1)

In [78]:
# X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=42)

# Обучение модели
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

# Вывод вероятностей
probabilities = model.predict_proba(X_test)
print(probabilities)

[[0.55851438 0.44148562]
 [0.51812288 0.48187712]
 [0.54245284 0.45754716]
 [0.53196457 0.46803543]
 [0.47428572 0.52571428]
 [0.56762034 0.43237966]
 [0.54520905 0.45479095]
 [0.53973625 0.46026375]
 [0.51021139 0.48978861]
 [0.47642559 0.52357441]
 [0.56431733 0.43568267]
 [0.54189219 0.45810781]
 [0.54850765 0.45149235]
 [0.49500837 0.50499163]
 [0.48821419 0.51178581]
 [0.46292792 0.53707208]
 [0.53008631 0.46991369]
 [0.49812753 0.50187247]
 [0.48312767 0.51687233]
 [0.49505792 0.50494208]
 [0.54008417 0.45991583]
 [0.49503081 0.50496919]
 [0.51660684 0.48339316]
 [0.52053087 0.47946913]
 [0.50700031 0.49299969]
 [0.53516124 0.46483876]
 [0.49622947 0.50377053]
 [0.52578979 0.47421021]
 [0.54006911 0.45993089]
 [0.42795261 0.57204739]
 [0.45671169 0.54328831]
 [0.56250794 0.43749206]
 [0.57658422 0.42341578]
 [0.45817104 0.54182896]
 [0.52242501 0.47757499]
 [0.51537148 0.48462852]
 [0.54247538 0.45752462]
 [0.46010769 0.53989231]
 [0.54844305 0.45155695]
 [0.5132167  0.4867833 ]


In [79]:
np.savetxt('ml_predictions.txt', probabilities, fmt='%.4f')

In [80]:
border = 0.55  # Пороговое значение вероятности по предсказанию

def profit_loss(u_p, d_p, u_r, d_r):
    if u_p > border and u_r == 1.0:
        return 1
    elif d_p > border and d_r == 1.0:
        return 1
    else:
        return 0

df = pd.DataFrame(probabilities, columns=['up_predict', 'down_predict'])
df_y = pd.DataFrame(y_test_2, columns=['up_real', 'down_real'])
df = pd.merge(df, df_y, left_index=True, right_index=True)  # , suffixes=('_predict', '_real')
df = df[(df.up_predict > border) | (df.down_predict > border)]  # Отсекаем неудовлетворяющие строки
df['profit_loss'] = df.apply(lambda x: profit_loss(x.up_predict, x.down_predict, x.up_real, x.down_real), axis=1)
print(df.to_string(max_rows=20, max_cols=22))
print(df.shape)

     up_predict  down_predict  up_real  down_real  profit_loss
0      0.558514      0.441486      1.0        0.0            1
5      0.567620      0.432380      0.0        1.0            0
10     0.564317      0.435683      0.0        1.0            0
29     0.427953      0.572047      0.0        1.0            1
31     0.562508      0.437492      0.0        1.0            0
32     0.576584      0.423416      1.0        0.0            1
41     0.551701      0.448299      0.0        1.0            0
45     0.583229      0.416771      1.0        0.0            1
48     0.558292      0.441708      1.0        0.0            1
49     0.559565      0.440435      0.0        1.0            0
..          ...           ...      ...        ...          ...
395    0.556370      0.443630      1.0        0.0            1
419    0.559430      0.440570      1.0        0.0            1
431    0.559931      0.440069      0.0        1.0            0
441    0.552872      0.447128      0.0        1.0      

In [81]:
rez = df.profit_loss.sum() / len(df.profit_loss)
print(rez)

0.5428571428571428
