In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

df = pd.read_csv('alphabets_28x28.csv', nrows=50000)
df = df.dropna()
sentiment_data = pd.read_csv('sentiment_analysis_dataset.csv')

X = df.iloc[:, 1:].values.astype(np.float32)  
y = df.iloc[:, 0].values.astype(str)     

X = X.reshape(X.shape[0], 28, 28, 1)

le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y, num_classes=26)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

ocr_model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(26, activation='softmax')
])

ocr_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the OCR model
ocr_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=128)


#Sentiments
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

sentiment_data = pd.read_csv('sentiment_analysis_dataset.csv')

sentences = sentiment_data['line'].values
sentiments = sentiment_data['sentiment'].values

le_sentiment = LabelEncoder()
sentiments = le_sentiment.fit_transform(sentiments)
sentiments = to_categorical(sentiments, num_classes=3)

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(sentences)

X_tokens = tokenizer.texts_to_sequences(sentences)

maxlen = 100
X_pad = pad_sequences(X_tokens, padding='post', maxlen=maxlen)

X_train_pad, X_test_pad, sentiments_train, sentiments_test = train_test_split(X_pad, sentiments, test_size=0.2, random_state=42)

#------------------------------------------------------------------------------------------------------------------------------

from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

sentiment_model = Sequential([
    Embedding(input_dim=5000, output_dim=100, input_length=100),
    LSTM(100, return_sequences=False),
    Dense(3, activation='softmax')
])

sentiment_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

sentiment_model.fit(X_train_pad, sentiments_train, validation_data=(X_test_pad, sentiments_test), epochs=10, batch_size=128)

#---------------------------------------------------------------------------------------------------------------------------
import cv2
import numpy as np
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Function to perform OCR on an image
def ocr_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (28, 28))
    img = img / 255.0
    img = img.reshape(1, 28, 28, 1)
    prediction = ocr_model.predict(img)
    label = le.inverse_transform([np.argmax(prediction)])
    return label[0]

# Test OCR on a sample image
ocr_text = ocr_image('target_images/line_4.png')
print(f'OCR Result: {ocr_text}')

#-----------------------------------------------------------------------------------------------------------------------------
from textblob import TextBlob

# Function to perform sentiment analysis
def sentiment_analysis(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        return 'positive'
    elif sentiment < 0:
        return 'negative'
    else:
        return 'neutral'

# Test sentiment analysis on OCR output
ocr_text = 'Sample text for testing'
sentiment = sentiment_analysis(ocr_text)
print(f'Sentiment: {sentiment}')

import os
import pandas as pd
from keras.models import load_model
import cv2
from textblob import TextBlob
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Directory containing target images
target_images_dir = 'target_images'

# Read target labels
target_labels = pd.read_csv('target_labels.csv')

# Loop through each image
results = []
for image_name in os.listdir(target_images_dir):
    image_path = os.path.join(target_images_dir, image_name)
    ocr_text = ocr_image(image_path)
    print(ocr_text)
    sentiment = sentiment_analysis(ocr_text)
    results.append({'image': image_name, 'ocr_text': ocr_text, 'sentiment': sentiment})

results_df = pd.DataFrame(results)
results_df.to_csv('results.csv', index=False)

  df = pd.read_csv('alphabets_28x28.csv', nrows=50000)


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 38ms/step - accuracy: 0.8782 - loss: 3.7444 - val_accuracy: 0.9882 - val_loss: 0.0320
Epoch 2/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.9933 - loss: 0.0209 - val_accuracy: 0.9952 - val_loss: 0.0146
Epoch 3/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.9972 - loss: 0.0094 - val_accuracy: 0.9972 - val_loss: 0.0115
Epoch 4/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.9983 - loss: 0.0041 - val_accuracy: 0.9954 - val_loss: 0.0138
Epoch 5/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.9987 - loss: 0.0040 - val_accuracy: 0.9964 - val_loss: 0.0132
Epoch 6/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.9987 - loss: 0.0036 - val_accuracy: 0.9946 - val_loss: 0.0167
Epoch 7/10
[1m312/312[0m 



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.3333 - loss: 1.0987 - val_accuracy: 0.3333 - val_loss: 1.0989
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.3333 - loss: 1.0989 - val_accuracy: 0.3333 - val_loss: 1.0987
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.3333 - loss: 1.0987 - val_accuracy: 0.3333 - val_loss: 1.0987
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - accuracy: 0.3333 - loss: 1.0987 - val_accuracy: 0.3333 - val_loss: 1.0987
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.3333 - loss: 1.0987 - val_accuracy: 0.3333 - val_loss: 1.0987
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.3333 - loss: 1.0987 - val_accuracy: 0.3333 - val_loss: 1.0986
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
import cv2
import numpy as np

def segment_characters(image):
    # Convert the image to grayscale and apply binary thresholding
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)

    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Sort contours from left to right
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
    
    character_images = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        char_img = binary[y:y+h, x:x+w]
        char_img = cv2.resize(char_img, (28, 28))
        character_images.append(char_img)
    
    return character_images

# Test segmentation
image_path = 'target_images/line_5.png'
image = cv2.imread(image_path)
character_images = segment_characters(image)

In [None]:
def ocr_character(character_image):
    character_image = character_image / 255.0
    character_image = character_image.reshape(1, 28, 28, 1)
    prediction = ocr_model.predict(character_image)
    label = le.inverse_transform([np.argmax(prediction)])
    return label[0]

# Test character recognition
recognized_text = ''.join([ocr_character(img) for img in character_images])
print(f'Recognized Text: {recognized_text}')
print(f'Recognized Sentence: {recognized_text}')

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder

alphabet_data = pd.read_csv('alphabets_28x28.csv')
alphabet_data = alphabet_data.dropna()
le = LabelEncoder()
le.fit(alphabet_data.iloc[:, 0].values)

def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return binary

def segment_characters(image):
    binary = preprocess_image(image)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
    character_images = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 1 and h > 1:  # Avoid very small contours
            char_img = binary[y:y+h, x:x+w]
            char_img = cv2.resize(char_img, (28, 28))
            character_images.append(char_img)
    return character_images

def ocr_character(character_image):
    character_image = character_image / 255.0
    character_image = character_image.reshape(1, 28, 28, 1)
    prediction = ocr_model.predict(character_image)
    label = le.inverse_transform([np.argmax(prediction)])
    return label[0]

def display_segmented_characters(character_images):
    plt.figure(figsize=(10, 2))
    for i, char_img in enumerate(character_images):
        plt.subplot(1, len(character_images), i + 1)
        plt.imshow(char_img, cmap='gray')
        plt.axis('off')
    plt.show()

def ocr_image(image_path):
    image = cv2.imread(image_path)
    character_images = segment_characters(image)
    display_segmented_characters(character_images)  # Debug: Display segmented characters
    recognized_text = ''.join([ocr_character(img) for img in character_images])
    return recognized_text

ocr_text = ocr_image('target_images/line_4.png')
print(f'OCR Result: {ocr_text}')

In [28]:
import cv2
import numpy as np
import pandas as pd
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# Function to preprocess the image chunk
def preprocess_chunk(chunk):
    chunk = chunk / 255.0
    chunk = chunk.reshape(1, 28, 28, 1)
    return chunk

# Function to perform OCR on an image chunk
def ocr_chunk(chunk):
    chunk = preprocess_chunk(chunk)
    prediction = ocr_model.predict(chunk)
    predicted_label = np.argmax(prediction)
    if prediction[0][predicted_label] > 0.5:  # Confidence threshold
        label = le.inverse_transform([predicted_label])
        return label[0]
    else:
        return '-'

# Function to divide the image into 28x28 chunks and perform OCR
def ocr_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    h, w = image.shape
    ocr_result = []

    for y in range(0, h, 28):
        line = []
        for x in range(0, w, 28):
            chunk = image[y:y+28, x:x+28]
            if chunk.shape[0] == 28 and chunk.shape[1] == 28:
                char = ocr_chunk(chunk)
                line.append(char)
            else:
                line.append('-')
        ocr_result.append(''.join(line))

    return '\n'.join(ocr_result)

# Test OCR on a sample image
ocr_text = ocr_image('target_images/line_2.png')
print(f'OCR Result:\n{ocr_text}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23