In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import cv2

from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Kaggle dataset

In [None]:
# Ścieżki do katalogów z danymi
train_img_dir = '/handwriting-recognition/train_v2/train/'
val_img_dir = '/handwriting-recognition/validation_v2/validation/'
test_img_dir = '/handwriting-recognition/test_v2/test/'
train_csv = '/handwriting-recognition/written_name_train_v2.csv'
validation_csv = '/handwriting-recognition/written_name_validation_v2.csv'
test_csv = '/handwriting-recognition/written_name_test_v2.csv'

train = pd.read_csv(train_csv)
valid = pd.read_csv(validation_csv)
test = pd.read_csv(test_csv)

train.head(6)

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, BatchNormalization, Bidirectional, LSTM, Lambda
from tensorflow.keras import backend as K


train = train.sample(frac=0.01, random_state=42)
valid = valid.sample(frac=0.01, random_state=42)


# Clean data
train.dropna(axis=0, inplace=True)
valid.dropna(axis=0, inplace=True)

# Remove unreadable labels
train = train[train['IDENTITY'] != 'UNREADABLE']
valid = valid[valid['IDENTITY'] != 'UNREADABLE']

# Convert labels to uppercase
train['IDENTITY'] = train['IDENTITY'].str.upper()
valid['IDENTITY'] = valid['IDENTITY'].str.upper()

# Reset indices
train.reset_index(inplace=True, drop=True)
valid.reset_index(inplace=True, drop=True)

train_size = len(train)
valid_size = len(valid)
# Define different preprocessing techniques
def preprocess_v1(img):
    (h, w) = img.shape
    final_img = np.ones([64, 256]) * 255  # blank white image
    if w > 256:
        img = img[:, :256]
    if h > 64:
        img = img[:64, :]
    final_img[:h, :w] = img
    return cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)

def preprocess_v2(img):
    img = cv2.resize(img, (256, 64))
    return cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)

preprocess_methods = [preprocess_v1, preprocess_v2]

# Define different sets of hyperparameters
hyperparams = [
    {'conv_filters': [32, 64, 128], 'lstm_units': [128, 64]},
    {'conv_filters': [64, 128, 256], 'lstm_units': [256, 128]},
]

# Iterate through different preprocessing methods
for preprocess_method in preprocess_methods:
    # Load and preprocess the data
    train_x = []
    for i in range(train_size):
        img_dir = '/content/handwriting-recognition/train_v2/train/' + train.loc[i, 'FILENAME']
        image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
        image = preprocess_method(image)
        image = image / 255.
        train_x.append(image)

    valid_x = []
    for i in range(valid_size):
        img_dir = '/content/handwriting-recognition/validation_v2/validation/' + valid.loc[i, 'FILENAME']
        image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
        image = preprocess_method(image)
        image = image / 255.
        valid_x.append(image)

    train_x = np.array(train_x).reshape(-1, 256, 64, 1)
    valid_x = np.array(valid_x).reshape(-1, 256, 64, 1)

    # Prepare labels and other data
    def label_to_num(label):
        label_num = []
        for ch in label:
            label_num.append(alphabets.find(ch))
        return np.array(label_num)

    train_y = np.ones([train_size, max_str_len]) * -1
    train_label_len = np.zeros([train_size, 1])
    train_input_len = np.ones([train_size, 1]) * (num_of_timestamps-2)
    train_output = np.zeros([train_size])

    for i in range(train_size):
        train_label_len[i] = len(train.loc[i, 'IDENTITY'])
        train_y[i, 0:len(train.loc[i, 'IDENTITY'])] = label_to_num(train.loc[i, 'IDENTITY'])

    valid_y = np.ones([valid_size, max_str_len]) * -1
    valid_label_len = np.zeros([valid_size, 1])
    valid_input_len = np.ones([valid_size, 1]) * (num_of_timestamps-2)
    valid_output = np.zeros([valid_size])

    for i in range(valid_size):
        valid_label_len[i] = len(valid.loc[i, 'IDENTITY'])
        valid_y[i, 0:len(valid.loc[i, 'IDENTITY'])] = label_to_num(valid.loc[i, 'IDENTITY'])

    # Iterate through different sets of hyperparameters
    for params in hyperparams:
        # Define the model using the Functional API
        input_data = Input(shape=(256, 64, 1), name='input')
        inner = Conv2D(params['conv_filters'][0], (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(input_data)
        inner = MaxPooling2D(pool_size=(2, 2))(inner)
        inner = Conv2D(params['conv_filters'][1], (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(inner)
        inner = MaxPooling2D(pool_size=(2, 2))(inner)
        inner = Conv2D(params['conv_filters'][2], (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(inner)
        inner = MaxPooling2D(pool_size=(2, 2))(inner)

        # Calculate the correct reshape dimensions
        inner_shape = inner.shape
        new_shape = (inner_shape[1] * inner_shape[2], inner_shape[3])
        inner = Reshape(target_shape=new_shape, name='reshape')(inner)

        inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)
        inner = Bidirectional(LSTM(params['lstm_units'][0], return_sequences=True, dropout=0.25))(inner)
        inner = Bidirectional(LSTM(params['lstm_units'][1], return_sequences=True, dropout=0.25))(inner)

        inner = Dense(num_of_characters, activation='softmax', kernel_initializer='he_normal', name='dense2')(inner)

        # CTC Loss function
        def ctc_lambda_func(args):
            y_pred, labels, input_length, label_length = args
            return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

        labels = Input(name='the_labels', shape=[max_str_len], dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')

        ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([inner, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length], outputs=ctc_loss)

        model.compile(optimizer='adam', loss={'ctc': lambda y_true, y_pred: y_pred})

        model.summary()

        # Prepare inputs for the model
        train_input = {
            'input': train_x,
            'the_labels': train_y,
            'input_length': train_input_len,
            'label_length': train_label_len
        }

        valid_input = {
            'input': valid_x,
            'the_labels': valid_y,
            'input_length': valid_input_len,
            'label_length': valid_label_len
        }

        # Dummy output, we will not use this
        train_output = np.zeros([train_size])
        valid_output = np.zeros([valid_size])

        # Train the model
        history = model.fit(train_input, train_output,
                            validation_data=(valid_input, valid_output),
                            epochs=10, batch_size=128)

        # Log the results
        print(f"Results for preprocessing method {preprocess_method.__name__} and hyperparameters {params}:")
        print(history.history)


In [None]:
# Funkcja do wczytywania i przetwarzania obrazów
def load_images_and_labels(df, img_dir):
    images = []
    labels = []
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['FILENAME'])
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Wczytaj obraz w skali szarości
        img = cv2.resize(img, (28, 28))  # Zmiana rozmiaru na 28x28
        images.append(img)
        labels.append(row['IDENTITY'])
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

# Wczytywanie i przetwarzanie obrazów
train_images, train_labels = load_images_and_labels(train, train_img_dir)
val_images, val_labels = load_images_and_labels(valid, val_img_dir)
test_images, test_labels = load_images_and_labels(test, test_img_dir)

# Reshape danych
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
val_images = val_images.reshape(val_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)

# Normalizacja danych
train_images = train_images.astype('float32') / 255
val_images = val_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

# Konwersja etykiet do postaci one-hot (zakładając, że mamy funkcję do mapowania nazw na indeksy)
def labels_to_indices(labels, unique_labels):
    label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
    return np.array([label_to_index[label] for label in labels])

unique_labels = np.unique(np.concatenate([train_labels, val_labels, test_labels]))
train_labels_indices = labels_to_indices(train_labels, unique_labels)
val_labels_indices = labels_to_indices(val_labels, unique_labels)
test_labels_indices = labels_to_indices(test_labels, unique_labels)

train_labels_one_hot = to_categorical(train_labels_indices, len(unique_labels))
val_labels_one_hot = to_categorical(val_labels_indices, len(unique_labels))
test_labels_one_hot = to_categorical(test_labels_indices, len(unique_labels))

# Podział danych na zbiór treningowy i walidacyjny (jeśli jeszcze nie zostało to zrobione)
# x_train, x_val, y_train, y_val = train_test_split(train_images, train_labels_one_hot, test_size=0.2, random_state=42)

# Przypisanie danych do zmiennych używanych w kodzie 
x_train, y_train = train_images, train_labels_one_hot
x_val, y_val = val_images, val_labels_one_hot
x_test, y_test = test_images, test_labels_one_hot

# Mnist dataset

In [None]:
# Załadowanie danych MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape danych
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

# Normalizacja danych
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Konwersja etykiet do postaci one-hot
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Podział danych na zbiór treningowy i walidacyjny
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

train_images, train_labels = x_train, y_train


# Experiment

In [None]:

# Przykładowe funkcje do przetwarzania wstępnego obrazów
def scale_images(images):
    # Implementacja skalowania obrazów
    scaled_images = images * 255  # Przykładowe skalowanie
    return scaled_images

def normalize_images(images):
    # Implementacja normalizacji obrazów
    normalized_images = images / 255  # Przykładowa normalizacja
    return normalized_images

def segment_images(images):
    # Implementacja segmentacji obrazów
    segmented_images = images  # Przykładowa segmentacja (brak zmiany)
    return segmented_images

# Funkcja do tworzenia modelu OCR
def create_ocr_model(num_layers=2, filter_size=3, activation='relu'):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(filter_size, filter_size), activation=activation, input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    for _ in range(num_layers - 1):
        model.add(Conv2D(32, kernel_size=(filter_size, filter_size), activation=activation))
        model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Funkcja do przetwarzania wstępnego obrazów
def preprocess_images(images, scaling=False, normalization=False, segmentation=False):
    processed_images = images.copy()
    
    if scaling:
        processed_images = scale_images(processed_images)
    
    if normalization:
        processed_images = normalize_images(processed_images)
    
    if segmentation:
        processed_images = segment_images(processed_images)
    
    return processed_images

# Funkcja do oceny dokładności modelu OCR
def evaluate_ocr_model(images, labels):
    model = create_ocr_model()
    model.fit(images, labels, epochs=10, batch_size=32, validation_split=0.2, verbose=0)
    
    # Ewaluacja modelu
    y_pred = model.predict(images)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(labels, axis=1)

    accuracy = accuracy_score(y_true_classes, y_pred_classes)
    report = classification_report(y_true_classes, y_pred_classes)

    return accuracy, report


# Funkcja do przeprowadzenia eksperymentu z doborem hiperparametrów
def run_hyperparameter_experiment(num_layers, filter_size, activation):
    model = create_ocr_model(num_layers=num_layers, filter_size=filter_size, activation=activation)

    # Trening modelu
    history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val), verbose=0)

    # Ewaluacja modelu
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true_classes, y_pred_classes)
    report = classification_report(y_true_classes, y_pred_classes)

    return accuracy, report


# Eksperymenty z przetwarzaniem wstępnym i doborem hiperparametrów
def experiment():
    
    # Eksperyment z przetwarzaniem wstępnym
    def preprocess_experiment():
        results = []

        # Eksperyment 1: Przetwarzanie wstępne obrazów - skalowanie i normalizacja
        processed_images = preprocess_images(train_images, scaling=True, normalization=True)
        accuracy_1, report_1 = evaluate_ocr_model(processed_images, train_labels)
        results.append({'preprocessing': 'Scaling + Normalization', 'accuracy': accuracy_1, 'report': report_1})

        # Eksperyment 2: Przetwarzanie wstępne obrazów - segmentacja
        processed_images = preprocess_images(train_images, segmentation=True)
        accuracy_2, report_2 = evaluate_ocr_model(processed_images, train_labels)
        results.append({'preprocessing': 'Segmentation', 'accuracy': accuracy_2, 'report': report_2})

        # Eksperyment 3: Przetwarzanie wstępne obrazów - skalowanie, normalizacja i segmentacja
        processed_images = preprocess_images(train_images, scaling=True, normalization=True, segmentation=True)
        accuracy_3, report_3 = evaluate_ocr_model(processed_images, train_labels)
        results.append({'preprocessing': 'Scaling + Normalization + Segmentation', 'accuracy': accuracy_3, 'report': report_3})

        # Wyświetlenie wyników eksperymentów przetwarzania wstępnego
        for result in results:
            print(f"Preprocessing Techniques: {result['preprocessing']}")
            print(f"Accuracy: {result['accuracy']}")
            print("Classification Report:")
            print(result['report'])
            print("\n" + "="*50 + "\n")


    # Eksperyment z doborem hiperparametrów
    def hyperparameter_experiment():
        hyperparameter_results = []

        # Eksperyment 1: Defaultowe hiperparametry
        accuracy, report = run_hyperparameter_experiment(num_layers=2, filter_size=3, activation='relu')
        hyperparameter_results.append({'hyperparameters': 'Default', 'accuracy': accuracy, 'report': report})

        # Eksperyment 2: Różna liczba warstw
        accuracy, report = run_hyperparameter_experiment(num_layers=3, filter_size=3, activation='relu')
        hyperparameter_results.append({'hyperparameters': '3 Layers', 'accuracy': accuracy, 'report': report})

        # Eksperyment 3: Różny rozmiar filtra
        accuracy, report = run_hyperparameter_experiment(num_layers=2, filter_size=5, activation='relu')
        hyperparameter_results.append({'hyperparameters': 'Filter Size 5x5', 'accuracy': accuracy, 'report': report})

        # Eksperyment 4: Różna funkcja aktywacji
        accuracy, report = run_hyperparameter_experiment(num_layers=2, filter_size=3, activation='tanh')
        hyperparameter_results.append({'hyperparameters': 'Tanh Activation', 'accuracy': accuracy, 'report': report})

        # Wyświetlenie wyników eksperymentów z doborem hiperparametrów
        for result in hyperparameter_results:
            print(f"Hyperparameters: {result['hyperparameters']}")
            print(f"Accuracy: {result['accuracy']}")
            print("Classification Report:")
            print(result['report'])
            print("\n" + "="*50 + "\n")

    preprocess_experiment()
    hyperparameter_experiment()

experiment()
