In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/arxiv/arxiv-metadata-oai-snapshot.json


In [20]:
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [21]:
# 1. Veri Seti Yolu ve Yapılandırması
data_path = "/kaggle/input/iam-handwritten-forms-dataset"  # Kaggle dataset path
output_path = "/kaggle/working"  # Çalışma dizini


In [22]:
# 2. Görüntülerin Listelemesi
image_files = []
for root, _, files in os.walk(os.path.join(data_path, "data")):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_files.append(os.path.join(root, file))

In [23]:

print(f"Toplam Görüntü: {len(image_files)}")

Toplam Görüntü: 1539


In [24]:
# 3. Train/Test Bölme (%80 Train, %20 Test)
train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)
print(f"Train Set: {len(train_files)} görüntü")
print(f"Test Set: {len(test_files)} görüntü")

Train Set: 1231 görüntü
Test Set: 308 görüntü


In [25]:
# 4. Sınıflandırma için Klasör Yapısının Hazırlanması
os.makedirs(os.path.join(output_path, "train"), exist_ok=True)
os.makedirs(os.path.join(output_path, "test"), exist_ok=True)

In [26]:
for file in train_files:
    class_name = os.path.basename(os.path.dirname(file))  # Sınıf adı
    class_dir = os.path.join(output_path, "train", class_name)
    os.makedirs(class_dir, exist_ok=True)
    shutil.copy(file, os.path.join(class_dir, os.path.basename(file)))

In [27]:
for file in test_files:
    class_name = os.path.basename(os.path.dirname(file))
    class_dir = os.path.join(output_path, "test", class_name)
    os.makedirs(class_dir, exist_ok=True)
    shutil.copy(file, os.path.join(class_dir, os.path.basename(file)))

In [28]:
print("Veriler train ve test klasörlerine taşındı.")

Veriler train ve test klasörlerine taşındı.


In [29]:
# 5. Görüntü İşleme ve Data Generator Ayarı
BATCH_SIZE = 32
IMG_SIZE = (150, 150)

train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)


In [30]:
validation_datagen = ImageDataGenerator(rescale=1.0 / 255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(output_path, "train"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"  # İkili sınıflandırma için
)

validation_generator = validation_datagen.flow_from_directory(
    os.path.join(output_path, "test"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"
)

Found 1231 images belonging to 579 classes.
Found 308 images belonging to 232 classes.


In [35]:
# 6. Model Oluşturma
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

In [None]:
# 7. Model Eğitimi
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=10
)

Epoch 1/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 5s/step - accuracy: 0.0062 - loss: -83220824.0000 - val_accuracy: 0.0035 - val_loss: -1390125696.0000
Epoch 2/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.0000e+00 - loss: -3952185088.0000 - val_accuracy: 0.0000e+00 - val_loss: -1904569984.0000
Epoch 3/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 4s/step - accuracy: 3.8634e-04 - loss: -27248508928.0000 - val_accuracy: 0.0035 - val_loss: -129928732672.0000
Epoch 4/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.0000e+00 - loss: -302651441152.0000 - val_accuracy: 0.0000e+00 - val_loss: -157463248896.0000
Epoch 5/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 4s/step - accuracy: 0.0036 - loss: -917043871744.0000 - val_accuracy: 0.0035 - val_loss: -2109635166208.0000
Epoch 6/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [33]:
# 8. Modeli Kaydetme
model.save(os.path.join(output_path, "handwriting_recognition_model.h5"))
print("Model başarıyla kaydedildi.")

Model başarıyla kaydedildi.


In [None]:
# 9. Eğitim Sonuçlarını Görselleştirme
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Eğitim Başarımı')
plt.plot(history.history['val_accuracy'], label='Doğrulama Başarımı')
plt.legend()
plt.title('Başarım')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Eğitim Kaybı')
plt.plot(history.history['val_loss'], label='Doğrulama Kaybı')
plt.legend()
plt.title('Kaybı')

plt.show()