In [None]:
# Importing Libraries
import os
import random
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
from timeit import default_timer as timer
import cv2
import pytesseract
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import DataLoader,random_split
from torchvision import transforms, datasets

In [None]:
## Function to go through the files.
def walk_through_dir(dir_path):
  """
  Walks through dir_path returning its contents.
  Args:
    dir_path (str or pathlib.Path): target directory

  Returns:
    A print out of:
      number of subdiretories in dir_path
      number of images (files) in each subdirectory
      name of each subdirectory
  """
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
walk_through_dir("/kaggle/input/iam-handwritten-forms-dataset")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
path = Path("/kaggle/input/iam-handwritten-forms-dataset")
data_path = path/"data"

In [None]:
image_path_list = list(data_path.glob("*/*"))
random_image_path = random.choice(image_path_list)

image_class = random_image_path.parent.stem

img = Image.open(random_image_path)

print(f"Random Image Path : {random_image_path}")
print(f"Image Class : {image_class}")
print(f"Image Height : {img.height}")
print(f"Image Width : {img.width}")
img

In [None]:
image_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels = 1),
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))
])
image_transforms

In [None]:
def plot_transformed_image(image_paths,transform,n=3,seed=42):
    random.seed(seed)
    random_image_paths = random.sample(image_paths,k=n)
    for image_path in random_image_paths:
        with Image.open(image_path) as f:
            fig,ax = plt.subplots(1,2)
            ax[0].imshow(f)
            ax[0].set_title(f"Original Size : {f.size}")
            ax[0].axis(False)
            transformed_image = transform(f).permute(1,2,0)
            ax[1].imshow(transformed_image)
            ax[1].set_title(f"Transformed \nsize : {transformed_image.shape}")
            ax[1].axis(False)
            
            fig.suptitle(f"Class : {image_path.parent.stem}",fontsize = 16)

In [None]:
plot_transformed_image(image_path_list,image_transforms)

In [None]:
train_data = datasets.ImageFolder(root = data_path,
                                 transform = image_transforms,
                                 target_transform = None)
train_data

In [None]:
## Splitting Data for Faster Experiment.
"""subset_size = 250
dataset_size = len(train_data)

if subset_size > dataset_size:
    subset_size = dataset_size

# Split the dataset into subset and remaining
subset, _ = random_split(train_data, [subset_size, dataset_size - subset_size])
"""

In [None]:
val_size = int(0.2 * len(train_data))  # 20% of the data for validation
train_size = len(train_data) - val_size

train_subset, val_subset = random_split(train_data, [train_size, val_size])

In [None]:
len(train_subset) , len(val_subset)

In [None]:
## Creating DataLoaders
train_dataloader = DataLoader(dataset = train_subset,
                             batch_size = 32,
                             shuffle=True,
                             num_workers = os.cpu_count())
val_dataloader = DataLoader(dataset = val_subset,
                           batch_size = 32,
                           shuffle = False,
                           num_workers = os.cpu_count())

train_dataloader , val_dataloader

In [None]:
image,label = train_data[0]
plt.imshow(transforms.ToPILImage()(image),cmap="gray")
plt.title(f"Label : {label}")
plt.axis(False)
plt.show()

In [None]:
## Creating Preprocess_image to improve image quality before extracting text
def preprocess_image(image):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    image_array = np.array(image)
    
    gray_image = cv2.cvtColor(image_array,cv2.COLOR_RGB2GRAY)
    
    _,thresh_image = cv2.threshold(gray_image,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return thresh_image

def extract_text_from_image(image):
    preprocessed_image = preprocess_image(image)
    pil_image = Image.fromarray(preprocessed_image)
    text = pytesseract.image_to_string(pil_image,config='--psm 11')
    return text.strip()

In [None]:
image_path_list = list(data_path.glob("*/*"))
random_image_path = random.choice(image_path_list)
image = Image.open(random_image_path)

extracted_text = extract_text_from_image(image)
print(f"Extracted Image : {extracted_text}")

In [None]:
image

In [None]:
## HandWritingRNN class
class HandWritingRNN(nn.Module):
    def __init__(self,
                input_shape : int,
                hidden_units : int,
                output_shape : int,
                num_layers : int = 1):
        super(HandWritingRNN,self).__init__()
        self.hidden_units = hidden_units
        self.num_layers = num_layers
        self.RNN = nn.LSTM(input_shape,
                          hidden_units,
                          num_layers,
                          batch_first = True)
        self.FC = nn.Linear(hidden_units,
                           output_shape)
    def forward(self,x,hidden):
        out,hidden = self.RNN(x,hidden)
        out = self.FC(out)
        return out,hidden
    
    def init_hidden(self,batch_size,device):
        return (torch.zeros(self.num_layers,batch_size,self.hidden_units,device = device),
               torch.zeros(self.num_layers,batch_size,self.hidden_units,device = device))

In [None]:
def train_step(model :torch.nn.Module,
              dataloader : torch.utils.data.DataLoader,
              loss_fn : torch.nn.Module,
              optimizer : torch.optim.Optimizer,
              device : torch.device):
    model.train()
    train_loss = 0.0 
    
    for X,_ in dataloader:
        X = X.to(device)
        
        batch_size,channels,height,width = X.size()
        X = X.view(batch_size,-1) # Flatting Image
        X = X.unsqueeze(1) # Adding Sequence Dimension
        
        hidden = model.init_hidden(X.size(0),device)
        
        optimizer.zero_grad()
        
        outputs, _ = model(X,hidden)
        
        outputs = outputs.squeeze(1) 
        targets = X.view(X.size(0),-1) # flatting the targets
        
        loss = loss_fn(outputs , targets)
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
    return train_loss / len(dataloader)

In [None]:
def test_step(model : torch.nn.Module,
             dataloader : torch.utils.data.DataLoader,
             loss_fn : torch.nn.Module,
             device = torch.device):
    model.eval()
    test_loss = 0.0 
    with torch.inference_mode():
        for X,_ in dataloader:
            X = X.to(device)
            
            batch_size,channels,height,width = X.size()
            X = X.view(batch_size,-1)
            X = X.unsqueeze(1)
            
            hidden = model.init_hidden(X.size(0),device)
            
            outputs, _ = model(X, hidden)

            # Flatten outputs and targets
            outputs = outputs.squeeze(1)  # Remove sequence dimension
            targets = X.view(X.size(0), -1)  # Flatten targets

            loss = loss_fn(outputs, targets)
            test_loss += loss.item()
    return test_loss / len(dataloader)

In [None]:
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          val_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          device: torch.device,
          epochs: int = 5):
    model.to(device)
    results = {"train_loss": [],
               "val_loss": []}
    
    for epoch in range(epochs):
        train_loss = train_step(model, train_dataloader, loss_fn, optimizer, device)
        val_loss = test_step(model, val_dataloader, loss_fn, device)
        
        print(f"Epoch: {epoch + 1} | train_loss: {train_loss:.3f} | val_loss: {val_loss:.3f}")
        results["train_loss"].append(train_loss)
        results["val_loss"].append(val_loss)
    
    return results


In [None]:
input_shape = 128*128
output_shape = input_shape
hidden_units = 256
num_layers = 2
model = HandWritingRNN(input_shape = input_shape,
                      output_shape = output_shape,
                      hidden_units = hidden_units,
                      num_layers = num_layers)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.002)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

start_time = timer()

model_results = train(model, train_dataloader, val_dataloader, optimizer, loss_fn, epochs=5, device=device)

end_time = timer()

print(f"Total Training Time : {end_time - start_time:.3f}seconds")

In [None]:
import torch.nn.functional as F
def sample(predictions, temperature=1.0):
    predictions = predictions / temperature
    probabilities = F.softmax(predictions, dim=-1)
    return torch.multinomial(probabilities, 1).item()

In [None]:
import torch.nn.functional as F
def generate_text(model, seed_text, max_length, device, temperature=1.0):
    model.eval()
    generated_text = seed_text
    
    # Converting Textv into tensor
    seed_tensor = torch.tensor([ord(char) for char in seed_text], dtype=torch.float32).unsqueeze(0).to(device)

    if seed_tensor.size(-1) < 16384:
        padding_size = 16384 - seed_tensor.size(-1)
        seed_tensor = F.pad(seed_tensor, (0, padding_size), 'constant', 0)
    else:
        seed_tensor = seed_tensor[:, :16384]
    
    seed_tensor = seed_tensor.unsqueeze(1)  

    hidden = model.init_hidden(seed_tensor.size(0), device)

    print(f"Initial seed_tensor shape: {seed_tensor.shape}")
    print(f"Initial Hidden state shape: {hidden[0].shape}, {hidden[1].shape}")

    with torch.no_grad():
        for _ in range(max_length):
            output, hidden = model(seed_tensor, hidden)

            output = output.squeeze(1) 
            predictions = output[-1] 

            predicted_char_index = sample(predictions, temperature)
            predicted_char = chr(predicted_char_index)
            generated_text += predicted_char

            # Update seed tensor with the new character
            new_char_tensor = torch.tensor([ord(predicted_char)], dtype=torch.float32).unsqueeze(0).to(device)
            new_char_tensor = F.pad(new_char_tensor, (0, 16384 - new_char_tensor.size(-1)), 'constant', 0)
            new_char_tensor = new_char_tensor.unsqueeze(1)

            # Slide the window to include the new character
            seed_tensor = torch.cat((seed_tensor[:, 1:, :], new_char_tensor), dim=1)

    return generated_text

In [None]:
seed_text = extracted_text
max_length = 200
temperature = 0.8
model.to(device)
generated_text = generate_text(model, seed_text, max_length, device)
print(f"Generated text: {generated_text}")

In [None]:
torch.save(model.state_dict(), "handwriting_model.pth")

In [None]:
model = HandWritingRNN(input_shape=128*128, output_shape=128*128, hidden_units=256, num_layers=2)
model.load_state_dict(torch.load("handwriting_model.pth"))
model.eval()  # Modelin değerlendirme moduna alınması

In [None]:
for i in range(38, 70):  # 38'den 40'a kadar (son rakam dahil) döngü ile işlem yapacağız
    image_path = f"/kaggle/input/files-png/img/Document 30042018_{i}.jpg"  # Dosya yolunu güncelliyoruz
    image = Image.open(image_path)  # Resmi yüklüyoruz
    extracted_text = extract_text_from_image(image)  # El yazısından metin çıkarma
    print(f"Extracted Text from Document 30042018_{i}.jpg: {extracted_text}")
    print("----------------------------------------------")


In [None]:
for i in range(69, 71):  # 38'den 40'a kadar (son rakam dahil) döngü ile işlem yapacağız
    image_path = f"/kaggle/input/files-png/img/Document 30042018_{i}.jpg"  # Dosya yolunu güncelliyoruz
    image = Image.open(image_path)  # Resmi yüklüyoruz
    extracted_text = extract_text_from_image(image)  # El yazısından metin çıkarma
    print(f"Extracted Text from Document 30042018_{i}.jpg: {extracted_text}")
    print("----------------------------------------------")

In [None]:
image_path = f"/kaggle/input/imgbin/imgbin-text-document-writing-english-information-family-quote-45PiHQd67YrAs0ez1VnQBC7EX.jpg"  # Dosya yolunu güncelliyoruz
image = Image.open(image_path)  # Resmi yüklüyoruz
extracted_text = extract_text_from_image(image)  # El yazısından metin çıkarma
print(f"Extracted Text from Document 30042018_{i}.jpg: {extracted_text}") 
print("----------------------------------------------")