In [None]:
!pip install pytesseract

In [3]:
!pip install easyocr


Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl.metadata (11 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

In [4]:
import os
import json
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
import easyocr

# Function to load data from image and annotations
def load_data(image_dir, annotation_dir):
    annotations = []

    for json_file in os.listdir(annotation_dir):
        if json_file.endswith('.json'):
            json_path = os.path.join(annotation_dir, json_file)
            with open(json_path, 'r') as f:
                data = json.load(f)

                for img_key, img_data in data.items():
                    image_path = os.path.join(image_dir, img_data["filename"])
                    image = cv2.imread(image_path)

                    if image is None:
                        continue

                    for region in img_data.get('regions', []):
                        shape_attr = region['shape_attributes']
                        region_attr = region['region_attributes']

                        x, y, w, h = shape_attr['x'], shape_attr['y'], shape_attr['width'], shape_attr['height']
                        text = region_attr['text']
                        field = region_attr['field']

                        annotations.append({
                            'image_path': image_path,
                            'bbox': (x, y, w, h),
                            'field': field,
                            'text': text
                        })

    return pd.DataFrame(annotations)

# Load your data
image_dir = '/content/drive/MyDrive/Created_Dataset/DATA'
annotation_dir = '/content/drive/MyDrive/Created_Dataset/Annotation'
data_df = load_data(image_dir, annotation_dir)

# Prepare data for model
def prepare_data(df):
    images = []
    texts = []
    fields = []

    for index, row in df.iterrows():
        image = cv2.imread(row['image_path'])
        if image is not None:
            image = cv2.resize(image, (64, 64))  # Resize to a fixed size for training
            images.append(image)
            texts.append(row['text'])
            fields.append(row['field'])

    images = np.array(images)

    return images, texts, fields

images, texts, fields = prepare_data(data_df)

# Encode the labels (fields)
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(fields)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
labels_onehot = onehot_encoder.fit_transform(integer_encoded)

# Tokenize text data
text_tokenizer = Tokenizer(num_words=5000)
text_tokenizer.fit_on_texts(texts)
text_sequences = text_tokenizer.texts_to_sequences(texts)

# Define a maxlen for the sequences
maxlen = 100
text_data = pad_sequences(text_sequences, maxlen=maxlen)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test, text_train, text_test = train_test_split(
    images, labels_onehot, text_data, test_size=0.2, random_state=42)

# Function to create and train the LSTM-based text classifier
def train_text_classifier(texts, labels, maxlen):
    model = Sequential([
        Embedding(input_dim=5000, output_dim=128, input_length=maxlen),  # Embedding layer for text input
        LSTM(128, return_sequences=False),  # LSTM layer for sequence data
        Dense(64, activation='relu'),
        Dense(labels.shape[1], activation='softmax')  # Number of classes from labels
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(texts, labels, epochs=10, validation_split=0.2)

    return model

# Train the model
text_classifier_model = train_text_classifier(text_train, y_train, maxlen)

# Save the model for later use
text_classifier_model.save('text_classifier_model.h5')

# Use EasyOCR to extract text from an image
def detect_and_extract_text_alternative(image_path):
    reader = easyocr.Reader(['en'])  # Initialize EasyOCR reader with English
    results = reader.readtext(image_path)

    texts = [result[1] for result in results]
    return texts

# Function to classify extracted texts
def classify_texts(texts, model, tokenizer, maxlen):
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=maxlen)  # Pad sequences to maxlen

    predictions = model.predict(padded_sequences)
    class_indices = np.argmax(predictions, axis=1)

    # Assuming you have a label encoder to map indices to class names
    field_names = label_encoder.classes_
    classified_fields = [field_names[idx] for idx in class_indices]

    return classified_fields

# Process an image, extract text, and classify it
def process_image(image_path, model, tokenizer, maxlen):
    # Extract text from the image
    extracted_texts = detect_and_extract_text_alternative(image_path)

    # Classify the extracted texts
    classified_fields = classify_texts(extracted_texts, model, tokenizer, maxlen)

    # Combine the extracted texts with their corresponding classifications
    results = list(zip(extracted_texts, classified_fields))

    return results

# Example usage with an image
image_path = '/content/drive/MyDrive/Created_Dataset/SS.png'
results = process_image(image_path, text_classifier_model, text_tokenizer, maxlen)

# Print the results
print("Results:", results)




Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 486ms/step - accuracy: 0.1692 - loss: 2.0742 - val_accuracy: 0.3000 - val_loss: 2.0154
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 255ms/step - accuracy: 0.3900 - loss: 1.9793 - val_accuracy: 0.3000 - val_loss: 1.8188
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 249ms/step - accuracy: 0.4110 - loss: 1.7115 - val_accuracy: 0.3000 - val_loss: 1.5252
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 258ms/step - accuracy: 0.4017 - loss: 1.3764 - val_accuracy: 0.2000 - val_loss: 1.5562
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 250ms/step - accuracy: 0.3598 - loss: 1.2769 - val_accuracy: 0.2000 - val_loss: 1.5448
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - accuracy: 0.2475 - loss: 1.2716 - val_accuracy: 0.3000 - val_loss: 1.5478
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━



Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% Complete

  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  state_dict = torch.load(model_path, map_location=device)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
Results: [('FAUGET', 'Student_Name'), ('HiGh', 'Student_Name'), ('SCHOOL', 'Student_Name'), ('STUDENT CARD', 'Student_Name'), ('Student Name', 'Student_Name'), ('Kyrie Petrakis', 'Student_Name'), ('Student ID', 'Student_Name'), ('123-456-7890', 'Student_Name'), ('D.O.B', 'Student_Name'), ('12/5/2000', 'Student_Name'), ('Home', 'Student_Name'), ('123 Anywhere St.,', 'Student_Name'), ('Address', 'Student_Name'), ('City', 'Student_Name'), ('In17 &', 'Student_Name'), ('Princlpal Name', 'Student_Name'), ('Any', 'Student_Name')]
