In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt

In [2]:
# Загрузка данных
data = pd.read_csv('train.csv')
data = data.drop(columns=['Unnamed: 0'])

# Проверка распределения эмоций
emotion_distribution = data['emotion'].value_counts()
print(emotion_distribution)

# Разделение данных на тренировочные и тестовые выборки
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['emotion'], random_state=42)

emotion
anger        7022
neutral      6795
sad          6740
surprise     6323
happy        5955
uncertain    5927
fear         5044
disgust      3155
contempt     3085
Name: count, dtype: int64


In [3]:
class EmotionModel:
    def __init__(self, model_name='VGG16'):
        self.model_name = model_name
        self.model = self.build_model()

    def build_model(self):
        if self.model_name == 'VGG16':
            base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        elif self.model_name == 'ResNet50':
            base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        else:
            raise ValueError("Unsupported model name: {}".format(self.model_name))

        base_model.trainable = False  # Заморозка базовой модели

        model = Sequential([
            base_model,
            GlobalAveragePooling2D(),
            Dense(512, activation='relu'),
            Dropout(0.5),
            Dense(256, activation='relu'),
            Dropout(0.5),
            Dense(9, activation='softmax')  # 9 классов эмоций
        ])

        # Разморозка верхних слоев базовой модели
        for layer in base_model.layers[-10:]:
            layer.trainable = True

        model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

        return model

    def train(self, train_generator, val_generator, class_weights):
        checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max')
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')

        history = self.model.fit(train_generator, epochs=50, validation_data=val_generator, class_weight=class_weights,
                                 callbacks=[checkpoint, early_stopping])

        self.model.save('emotion_classification_model_2.h5')

        return history

    def plot_history(self, history):
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Train Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.legend()
        plt.title('Loss')

        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Train Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.legend()
        plt.title('Accuracy')
        plt.show()

    def predict(self, test_generator):
        predictions = self.model.predict(test_generator)
        return np.argmax(predictions, axis=1)

In [4]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_data,
    x_col='image_path',
    y_col='emotion',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = test_datagen.flow_from_dataframe(
    test_data,
    x_col='image_path',
    y_col='emotion',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Получение весов классов
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_data['emotion']),
    y=train_data['emotion']
)
class_weights = {i: class_weights[i] for i in range(len(class_weights))}

Found 40036 validated image filenames belonging to 9 classes.
Found 10010 validated image filenames belonging to 9 classes.


In [None]:
# Создание экземпляра модели
emotion_model = EmotionModel(model_name='ResNet50')

# Обучение модели
history = emotion_model.train(train_generator, val_generator, class_weights)

# Визуализация истории обучения
emotion_model.plot_history(history)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50

In [None]:
# Подготовка тестовых данных
test_data_dir = 'test'  # Путь к папке с тестовыми изображениями
test_images = [os.path.join(test_data_dir, img) for img in os.listdir(test_data_dir) if img.endswith('.jpg')]

# Создание DataFrame для тестовых данных
test_df = pd.DataFrame(test_images, columns=['image_path'])

# Генератор данных для тестовых изображений
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='image_path',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Предсказание эмоций
predicted_classes = emotion_model.predict(test_generator)

# Сопоставление индексов классов с их названиями
class_indices = train_generator.class_indices
labels = {v: k for k, v in class_indices.items()}
predicted_labels = [labels[k] for k in predicted_classes]

# Добавление предсказанных эмоций в DataFrame
test_df['emotion'] = predicted_labels

# Удаление пути к изображению, оставляем только имя файла
test_df['image_path'] = test_df['image_path'].apply(lambda x: os.path.basename(x))

# Сохранение результатов предсказаний в CSV файл
test_df.to_csv('predictions_2.csv', index=False)

# Вывод первых нескольких строк для проверки
test_df.head()