# 3. Обучение лучших моделей

#### Импортирование необходимых библиотек

In [None]:
import numpy as np
import pandas as pd
import warnings
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold

#### Загрузка и подготовка данных

In [None]:
df = pd.read_csv('all_embeddings_text.csv')
df_2 = pd.read_csv('all_embeddings_prep_text.csv')

columns_to_add = ['1_prep_text', '2_prep_text', '3_prep_text']
df = df.join(df_2[columns_to_add])

def parse_string_to_list(s):
    clean_s = s.strip('[]') 
    numbers = clean_s.split() 
    return [float(num) for num in numbers]

for i in range(1,4):
    df[f'{i}_text'] = df[f'{i}_text'].apply(parse_string_to_list)
    df[f'{i}_prep_text'] = df[f'{i}_prep_text'].apply(parse_string_to_list)

df['Label'] = df['Label'].astype(int)

#### Обучение моделей

In [None]:
warnings.filterwarnings("ignore")

# Количество классов
num_classes = 4  

for i in range(1, 3):

    X_train_val = np.array(df[f'{i}_text'].to_list())
    y_train_val = np.array(df['Label'])

    n_features = len(X_train_val[0])

    model = Sequential()
    model.add(Dense(512, input_shape=(n_features,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

    y_train_val_categorical = to_categorical(y_train_val, num_classes=num_classes)

    n_splits = 4
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train_val, y_train_val)):
        print(f"Training fold {fold_idx + 1}/{n_splits}")

        X_train = X_train_val[train_idx]
        y_train = y_train_val_categorical[train_idx]
        X_val = X_train_val[val_idx]
        y_val = y_train_val_categorical[val_idx]

        early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, callbacks=[early_stopping])

    if i == 1:
        model.save('best_model.h5')
    else:
        model.save('fast_model.h5')

