## Основной код

In [None]:
import tensorflow as tf
import pandas as pd
from keras.preprocessing.text import Tokenizer
import numpy as np
import keras
from tensorflow.keras import layers
from tensorflow.keras import models
import matplotlib.pyplot as plt



def data_preparation():
    train_data = pd.DataFrame(pd.read_csv('train.csv', sep=';'))
    print(train_data.shape)
    print(train_data.dtypes)
    train_data = train_data.fillna('delivered')

    categories = {}

    for key, value in enumerate(train_data['Error'].unique()):
        categories[value] = key + 1

    train_data['Category_code'] = train_data['Error'].map(categories)

    total_categories = len(train_data['Error'].unique())

    train_data = train_data.sample(frac=1).reset_index(drop=True)    #Функция перемештвания строк

    print('Count of category: {}'.format(total_categories))
    print(train_data.head())

    return train_data


def load_data_from_arrays(strings, labels, train_test_split=0.9):
    data_size = len(strings)
    test_size = int(data_size - round(data_size * train_test_split))
    print("Test size: {}".format(test_size))

    print("\nTraining set:")
    x_train = strings[test_size:]
    print("\t - x_train: {}".format(len(x_train)))
    y_train = labels[test_size:]
    print("\t - y_train: {}".format(len(y_train)))

    print("\nTesting set:")
    x_test = strings[:test_size]
    print("\t - x_test: {}".format(len(x_test)))
    y_test = labels[:test_size]
    print("\t - y_test: {}".format(len(y_test)))

    return x_train, y_train, x_test, y_test

def vectorize_sequences(sequences, dimension=25000):
    """Векторизация данных"""
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results


def to_one_hot(labels, dimension=4):
    """Векторизация меток (Кодирование категорий). Прямое кодирование(one-hot encoding)
       широко используется для форматирования категорий и также называется кодированием категорий"""
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.
    return results


train_db = data_preparation()
description = train_db.apply(lambda x: (x['Source'], x['Destination'], x['Length'], x['Source Port'], x['Dest Port'], x['Delta time']), axis = 1)
categories = train_db['Category_code']

print(description.head())
print(categories.head())
X_train, y_train, X_test, y_test = load_data_from_arrays(description, categories, train_test_split=0.9)

#X_train = vectorize_sequences(X_train)
#X_test = vectorize_sequences(X_test)

y_train = to_one_hot(y_train)
y_test = to_one_hot(y_test)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

num_classes = 4

epochs = 10

model = models.Sequential()
model.add(layers.Dense(512, input_shape=(1000,)))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(4))
model.add(layers.Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

history = model.fit(X_train, y_train,
                    batch_size=32,
                    epochs=epochs,
                    verbose=1,
                    )
score = model.evaluate(X_test, y_test,
                       batch_size=32, verbose=1)
# График точности модели
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# График оценки loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

