In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Path to the CSV file
csv_path = 'alphabets_28x28.csv'

# Load the dataset
data = pd.read_csv(csv_path)

# Assuming the dataset has a column 'label' for the character and the rest are pixel values
labels = data['label'].values
images = data.drop(columns=['label'])

# Function to check if all values in a row are numeric
def is_numeric(row):
    try:
        row.astype(np.float32)
        return True
    except ValueError:
        return False

# Filter out rows that are not numeric
numeric_mask = images.apply(is_numeric, axis=1)
data = data[numeric_mask]

# Separate the labels and images again
labels = data['label'].values
images = data.drop(columns=['label']).values

# Convert the pixel values to floats
images = images.astype(np.float32)

# Reshape the images (assuming 28x28 size)
images = images.reshape(-1, 28, 28)

# Normalize the images
images = images / 255.0

# Convert character labels to integer labels
label_mapping = {char: idx for idx, char in enumerate(sorted(set(labels)))}
integer_labels = np.array([label_mapping[label] for label in labels])

# Convert labels to categorical
labels_categorical = to_categorical(integer_labels, num_classes=len(label_mapping))

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels_categorical, test_size=0.2, random_state=42)

# Expand dimensions to add a channel (grayscale)
X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)


# Building the OCR model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

def build_ocr_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(len(label_mapping), activation='softmax')  # Number of classes should match number of unique labels
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = (28, 28, 1)
ocr_model = build_ocr_model(input_shape)

# Train the model
ocr_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Summary of the model
ocr_model.summary()


  data = pd.read_csv(csv_path)


Epoch 1/10

In [None]:
import os
import cv2

def preprocess_image(image_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Normalize and resize the image
    normalized_image = image / 255.0
    resized_image = cv2.resize(normalized_image, (28, 28))

    return resized_image

def extract_text_from_images(model, images_folder):
    extracted_texts = []
    image_files = os.listdir(images_folder)

    for image_file in image_files:
        image_path = os.path.join(images_folder, image_file)
        preprocessed_image = preprocess_image(image_path)
        preprocessed_image = np.expand_dims(preprocessed_image, axis=-1)  # Add channel dimension
        preprocessed_image = np.expand_dims(preprocessed_image, axis=0)  # Add batch dimension

        prediction = model.predict(preprocessed_image)
        predicted_class = np.argmax(prediction)
        predicted_char = chr(predicted_class + 65)  # Convert to corresponding character

        extracted_texts.append(predicted_char)

    return extracted_texts

# Example usage to extract texts from target images
target_images_folder = 'target_images'
extracted_texts = extract_text_from_images(ocr_model, target_images_folder)
print(extracted_texts)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load sentiment analysis dataset
sentiment_df = pd.read_csv('sentiment_analysis_dataset.csv')

# Text preprocessing
def preprocess_text(text):
    # Tokenization, stop words removal, etc. can be done here
    return text

sentiment_df['line'] = sentiment_df['line'].apply(preprocess_text)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(sentiment_df['line'], sentiment_df['sentiment'], test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)

# Padding sequences
maxlen = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_pad = pad_sequences(X_val_seq, maxlen=maxlen)
# Convert sentiment labels to numeric values (assuming 'Positive' maps to 1 and others to 0)
y_train = y_train.apply(lambda x: 1 if x == 'Positive' else 0)
y_val = y_val.apply(lambda x: 1 if x == 'Positive' else 0)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Building the model
def build_sentiment_model(input_length, vocab_size):
    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=128, input_length=input_length),
        LSTM(128, dropout=0.2, recurrent_dropout=0.2),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

vocab_size = len(tokenizer.word_index) + 1
input_length = maxlen
sentiment_model = build_sentiment_model(input_length, vocab_size)

# Summary of the model
sentiment_model.summary()


# Train the model
sentiment_model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_val_pad, y_val))


In [None]:
# Preprocess extracted texts
def preprocess_extracted_texts(texts):
    seqs = tokenizer.texts_to_sequences(texts)
    padded_seqs = pad_sequences(seqs, maxlen=maxlen)
    return padded_seqs

# Predict sentiments
extracted_texts_preprocessed = preprocess_extracted_texts(extracted_texts)
predicted_sentiments = sentiment_model.predict(extracted_texts_preprocessed)
sentiment_labels = []
for pred in predicted_sentiments:
    if pred > 0.7:
        sentiment_labels.append("Happy")
    elif pred > 0.3:
        sentiment_labels.append("Neutral")
    else:
        sentiment_labels.append("Angry")

print(sentiment_labels)
