In [1]:
!pip install tensorflow pandas matplotlib




In [3]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Bidirectional, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import matplotlib.pyplot as plt

In [5]:
data_path = r'C:\Users\redmi\kirill'
train_path = os.path.join(data_path, 'train')
test_path = os.path.join(data_path, 'test')
train_tsv = os.path.join(data_path, 'train.tsv')
test_tsv = os.path.join(data_path, 'test.tsv')

In [7]:
train_labels = pd.read_csv(train_tsv, sep='\t', header=None, names=['filename', 'text'])
test_labels = pd.read_csv(test_tsv, sep='\t', header=None, names=['filename', 'text'])

In [9]:
img_size = (128, 32)

In [11]:
def preprocess_image(file_path, target_size=img_size):
    img = load_img(file_path, color_mode='grayscale', target_size=target_size)
    img_array = img_to_array(img) / 255.0  # Нормализация
    return img_array

In [13]:
def create_dataset(data_dir, labels_df):
    images = []
    texts = []
    for _, row in labels_df.iterrows():
        file_path = os.path.join(data_dir, row['filename'])
        if os.path.exists(file_path):
            images.append(preprocess_image(file_path))
            texts.append(row['text'])
    return np.array(images), np.array(texts)

In [15]:
x_train, y_train = create_dataset(train_path, train_labels)
x_test, y_test = create_dataset(test_path, test_labels)

In [17]:
input_layer = Input(shape=(img_size[0], img_size[1], 1))
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output_layer = Dense(len(set(y_train)), activation='softmax')(x)

In [19]:
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [21]:
y_train_numeric = np.array([ord(ch) for ch in ''.join(y_train)])  # Пример преобразования текста в числа
model.fit(x_train, y_train_numeric, validation_split=0.2, epochs=10, batch_size=32)

Epoch 1/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 193ms/step - accuracy: 0.0837 - loss: 3.8279 - val_accuracy: 0.0834 - val_loss: 3.4641
Epoch 2/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 200ms/step - accuracy: 0.0870 - loss: 3.4762 - val_accuracy: 0.1042 - val_loss: 3.4491
Epoch 3/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m345s[0m 191ms/step - accuracy: 0.0952 - loss: 3.4539 - val_accuracy: 0.1042 - val_loss: 3.4447
Epoch 4/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m350s[0m 193ms/step - accuracy: 0.0896 - loss: 3.4537 - val_accuracy: 0.1042 - val_loss: 3.4283
Epoch 5/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 186ms/step - accuracy: 0.0922 - loss: 3.4375 - val_accuracy: 0.1042 - val_loss: 3.4221
Epoch 6/10
[1m1808/1808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 202ms/step - accuracy: 0.0943 - loss: 3.4331 - val_accuracy: 0.1042 - val_loss:

<keras.src.callbacks.history.History at 0x1f4435711f0>

NameError: name 'model' is not defined