# 3. Обучение лучших моделей

#### Импортирование необходимых библиотек

In [1]:
import numpy as np
import pandas as pd
import warnings
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold

#### Загрузка и подготовка данных

In [2]:
df = pd.read_csv('all_embeddings_text.csv')
df_2 = pd.read_csv('all_embeddings_prep_text.csv')

columns_to_add = ['1_prep_text', '2_prep_text', '3_prep_text']
df = df.join(df_2[columns_to_add])

def parse_string_to_list(s):
    clean_s = s.strip('[]') 
    numbers = clean_s.split() 
    return [float(num) for num in numbers]

for i in range(1,4):
    df[f'{i}_text'] = df[f'{i}_text'].apply(parse_string_to_list)
    df[f'{i}_prep_text'] = df[f'{i}_prep_text'].apply(parse_string_to_list)

df['Label'] = df['Label'].astype(int)

#### Обучение моделей

In [3]:
warnings.filterwarnings("ignore")

# Количество классов
num_classes = 4  

for i in range(1, 3):

    X_train_val = np.array(df[f'{i}_text'].to_list())
    y_train_val = np.array(df['Label'])

    n_features = len(X_train_val[0])

    model = Sequential()
    model.add(Dense(512, input_shape=(n_features,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation='softmax'))  

    model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

    y_train_val_categorical = to_categorical(y_train_val, num_classes=num_classes)

    n_splits = 4
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train_val, y_train_val)):
        print(f"Training fold {fold_idx + 1}/{n_splits}")

        X_train = X_train_val[train_idx]
        y_train = y_train_val_categorical[train_idx]
        X_val = X_train_val[val_idx]
        y_val = y_train_val_categorical[val_idx]

        early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, callbacks=[early_stopping])

    if i == 1:
        model.save('best_model.h5')
    else:
        model.save('fast_model.h5')



Training fold 1/4
Epoch 1/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8895 - loss: 0.4063 - val_accuracy: 0.9408 - val_loss: 0.2104
Epoch 2/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9452 - loss: 0.1919 - val_accuracy: 0.9435 - val_loss: 0.1927
Epoch 3/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9535 - loss: 0.1658 - val_accuracy: 0.9458 - val_loss: 0.1893
Epoch 4/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9570 - loss: 0.1510 - val_accuracy: 0.9474 - val_loss: 0.1799
Epoch 5/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9628 - loss: 0.1325 - val_accuracy: 0.9473 - val_loss: 0.1809
Epoch 6/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9647 - loss: 0.1140 - val_accuracy: 0.9502 - val_loss: 0.1726
Epoch 7/10



Training fold 1/4
Epoch 1/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8518 - loss: 0.4765 - val_accuracy: 0.9312 - val_loss: 0.2331
Epoch 2/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9359 - loss: 0.2291 - val_accuracy: 0.9360 - val_loss: 0.2201
Epoch 3/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9451 - loss: 0.1942 - val_accuracy: 0.9386 - val_loss: 0.2129
Epoch 4/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9483 - loss: 0.1801 - val_accuracy: 0.9401 - val_loss: 0.2097
Epoch 5/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9530 - loss: 0.1674 - val_accuracy: 0.9426 - val_loss: 0.2021
Epoch 6/10
[1m618/618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9556 - loss: 0.1588 - val_accuracy: 0.9429 - val_loss: 0.1972
Epoch 7/10

