In [1]:
# importing necessary models for the project
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
! cp '/content/drive/My Drive/Classroom/alphabet_data.zip' .

In [4]:
! unzip '/content/alphabet_data.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: alphabet_images/image_9550.png  
 extracting: alphabet_images/image_95500.png  
  inflating: alphabet_images/image_95501.png  
  inflating: alphabet_images/image_95502.png  
  inflating: alphabet_images/image_95503.png  
  inflating: alphabet_images/image_95504.png  
  inflating: alphabet_images/image_95505.png  
 extracting: alphabet_images/image_95506.png  
  inflating: alphabet_images/image_95507.png  
  inflating: alphabet_images/image_95508.png  
 extracting: alphabet_images/image_95509.png  
  inflating: alphabet_images/image_9551.png  
  inflating: alphabet_images/image_95510.png  
  inflating: alphabet_images/image_95511.png  
  inflating: alphabet_images/image_95512.png  
 extracting: alphabet_images/image_95513.png  
 extracting: alphabet_images/image_95514.png  
  inflating: alphabet_images/image_95515.png  
  inflating: alphabet_images/image_95516.png  
 extracting: alphabet_images/image_95517.png

In [5]:
dataset_path = '/content/alphabet_images'


In [6]:
import os
print(os.listdir(dataset_path))


['image_341567.png', 'image_124202.png', 'image_106546.png', 'image_114190.png', 'image_211807.png', 'image_78153.png', 'image_77375.png', 'image_328494.png', 'image_149552.png', 'image_300042.png', 'image_117447.png', 'image_22895.png', 'image_343821.png', 'image_91669.png', 'image_287719.png', 'image_9135.png', 'image_221079.png', 'image_125195.png', 'image_140846.png', 'image_231674.png', 'image_299667.png', 'image_204113.png', 'image_311249.png', 'image_349351.png', 'image_18773.png', 'image_177900.png', 'image_294659.png', 'image_31244.png', 'image_145693.png', 'image_250222.png', 'image_61664.png', 'image_57932.png', 'image_21665.png', 'image_165970.png', 'image_211338.png', 'image_130778.png', 'image_342033.png', 'image_24398.png', 'image_342631.png', 'image_275470.png', 'image_218746.png', 'image_67063.png', 'image_347188.png', 'image_189042.png', 'image_37767.png', 'image_289928.png', 'image_136799.png', 'image_246903.png', 'image_89980.png', 'image_1196.png', 'image_127800.pn

In [None]:
import pandas as pd

# Loading the CSV file
df = pd.read_csv('alphabet_labels.csv')
print(df.head())


          file label
0  image_1.png     A
1  image_2.png     A
2  image_3.png     A
3  image_4.png     A
4  image_5.png     A


In [21]:


# processing on the dataset
class HandwrittenTextDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.labels_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

        # Map string labels to numeric labels
        self.label_map = {label: idx for idx, label in enumerate(sorted(self.labels_frame['label'].unique()))}

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.labels_frame.iloc[idx, 0])
        image = Image.open(img_name).convert('L')
        label_str = self.labels_frame.iloc[idx, 1]

        # Convert label to numeric tensor
        label_numeric = self.label_map[label_str]
        label_tensor = torch.tensor(label_numeric, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label_tensor

# # Hyperparameters for the model
num_classes = 26
num_epochs = 5
learning_rate = 0.001
batch_size = 32

# # check wheather gpu is available or not
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if(torch.cuda.is_available):
    print("gpu is available")


# # preprocessing the data
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
])

# # Create dataset and dataloader for the model
dataset = HandwrittenTextDataset(csv_file='alphabet_labels.csv', root_dir='alphabet_images', transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


# # Save the trained model
# torch.save(model.state_dict(), 'handwritten_text_recognition_model.pth')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.lstm = nn.LSTM(input_size=64 * 7, hidden_size=256, num_layers=2, batch_first=True)  # Adjust input_size here
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Apply convolution and pooling
        x = self.pool(F.relu(self.conv2(x)))  # Apply convolution and pooling
        x = x.permute(0, 3, 1, 2)  # Permute dimensions to (batch_size, channels, height, width)
        x = x.view(x.size(0), x.size(1), -1)  # Reshape to (batch_size, channels, features)
        lstm_out, _ = self.lstm(x)  # Pass through LSTM layer
        x = self.fc(lstm_out[:, -1, :])  # Take the last time step output and pass through FC layer
        return x

# Assuming the rest of your script follows with dataset handling, training loop, etc.

# Hyperparameters for training
num_classes = 26
num_epochs = 5
learning_rate = 0.001
batch_size = 32

# Assuming dataset and DataLoader setup remains the same as before

# Initialize model, criterion, and optimizer
model = CRNN(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}')

# Save the trained model
torch.save(model.state_dict(), 'handwritten_text_recognition_model.pth')



gpu is available
Epoch [1/5], Loss: 0.13871318139810426
Epoch [2/5], Loss: 0.05418105475017507
Epoch [3/5], Loss: 0.04263069401275317
Epoch [4/5], Loss: 0.036495526099404374
Epoch [5/5], Loss: 0.03236440780724088


In [22]:


# Load dataset for the sentiment analysis
df = pd.read_csv('sentiment_analysis_dataset.csv')

# Preprocess the data from the dataset
X = df['line']
y = df['sentiment']

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numerical data using TF-IDF
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

X_test_counts = vectorizer.transform(X_test)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)

# Convert to PyTorch tensors
X_train_tfidf = torch.tensor(X_train_tfidf.toarray(), dtype=torch.float32)
X_test_tfidf = torch.tensor(X_test_tfidf.toarray(), dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim

class NaiveBayesClassifier(nn.Module):
    def __init__(self, num_classes, num_features):
        super(NaiveBayesClassifier, self).__init__()
        self.num_classes = num_classes
        self.num_features = num_features

        # Parameters for Naive Bayes: class priors and conditional probabilities
        self.class_priors = nn.Parameter(torch.zeros(num_classes), requires_grad=True)
        self.cond_probs = nn.Parameter(torch.zeros(num_classes, num_features), requires_grad=True)

    def forward(self, x):
        # Calculate log-probabilities using class priors and conditional probabilities
        # x.shape should be (batch_size, num_features)
        log_probs = torch.matmul(x, self.cond_probs.t()) + self.class_priors

        return log_probs

    def predict(self, x):
        # Get class predictions by selecting the class with highest log-probability
        log_probs = self.forward(x)
        _, predicted_classes = torch.max(log_probs, 1)

        return predicted_classes

# Assuming X_train_tfidf and X_test_tfidf are already PyTorch tensors
num_classes = len(torch.unique(y_train))  # Number of unique classes
num_features = X_train_tfidf.shape[1]     # Number of features (terms in TF-IDF)

# Instantiate the Naive Bayes classifier
model = NaiveBayesClassifier(num_classes, num_features)

# Convert y_train to one-hot encoded tensor
y_train_one_hot = torch.zeros(len(y_train), num_classes)
y_train_one_hot.scatter_(1, y_train.unsqueeze(1), 1)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For computing the loss between predicted and target labels
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tfidf)

    # Compute loss
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch+1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation
with torch.no_grad():
    model.eval()
    y_pred = model.predict(X_test_tfidf)

    # Convert y_test to numpy array for evaluation metrics
    y_test_np = y_test.numpy()
    y_pred_np = y_pred.numpy()

    # Calculate accuracy
    accuracy = (y_pred_np == y_test_np).mean()
    print(f'Accuracy: {accuracy:.4f}')


Epoch [1/10], Loss: 1.0986
Epoch [2/10], Loss: 1.0943
Epoch [3/10], Loss: 1.0899
Epoch [4/10], Loss: 1.0856
Epoch [5/10], Loss: 1.0813
Epoch [6/10], Loss: 1.0770
Epoch [7/10], Loss: 1.0727
Epoch [8/10], Loss: 1.0684
Epoch [9/10], Loss: 1.0641
Epoch [10/10], Loss: 1.0598
Accuracy: 0.8333


In [None]:
class SentimentAnalysisModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SentimentAnalysisModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_dim = X_train_tfidf.shape[1]
hidden_dim = 128
output_dim = len(label_encoder.classes_)
model = SentimentAnalysisModel(input_dim, hidden_dim, output_dim)


In [None]:
# Training parameters
learning_rate = 0.001
num_epochs = 10
batch_size = 32

# DataLoader
train_data = torch.utils.data.TensorDataset(X_train_tfidf, y_train)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for i, (lines, labels) in enumerate(train_loader):
        outputs = model(lines)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

#  Save the trained model
torch.save(model.state_dict(), 'sentiment_analysis_model.pth')

# Save the vectorizer, tfidf_transformer, and label_encoder using joblib
import joblib
joblib.dump(vectorizer, 'vectorizer.pkl')
joblib.dump(tfidf_transformer, 'tfidf_transformer.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')


Epoch [1/10], Loss: 1.0977
Epoch [2/10], Loss: 1.0903
Epoch [3/10], Loss: 1.0830
Epoch [4/10], Loss: 1.0758
Epoch [5/10], Loss: 1.0687
Epoch [6/10], Loss: 1.0615
Epoch [7/10], Loss: 1.0541
Epoch [8/10], Loss: 1.0465
Epoch [9/10], Loss: 1.0386
Epoch [10/10], Loss: 1.0305


['label_encoder.pkl']

In [24]:
# Evaluate the model
model.eval()
with torch.no_grad():
    outputs = model(X_test_tfidf)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)

print(f'Test Accuracy: {accuracy * 100:.2f}%')


Test Accuracy: 83.33%


In [25]:
def predict_sentiment(text, model, vectorizer, tfidf_transformer, label_encoder):
    text_counts = vectorizer.transform([text])
    text_tfidf = tfidf_transformer.transform(text_counts)
    text_tfidf = torch.tensor(text_tfidf.toarray(), dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        outputs = model(text_tfidf)
        _, predicted = torch.max(outputs.data, 1)

    sentiment = label_encoder.inverse_transform(predicted.numpy())
    return sentiment[0]

# Example usage with recognized text from HTR model
recognized_text = "I am happy"
predicted_sentiment = predict_sentiment(recognized_text, model, vectorizer, tfidf_transformer, label_encoder)

print(f'Recognized Text: {recognized_text}')
print(f'Predicted Sentiment: {predicted_sentiment}')


Recognized Text: I am happy
Predicted Sentiment: Happy


In [26]:


# Load the true labels
labels_df = pd.read_csv('target_labels.csv')

# Transform for image preprocessing
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Adjust size as needed
    transforms.ToTensor(),
])

# Directory containing the test images
image_dir = 'target_images'


In [31]:
def predict_sentiment(text, model, vectorizer, tfidf_transformer, label_encoder):
    text_counts = vectorizer.transform([text])
    text_tfidf = tfidf_transformer.transform(text_counts)
    text_tfidf = torch.tensor(text_tfidf.toarray(), dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        outputs = model(text_tfidf)
        _, predicted = torch.max(outputs, 1)

    sentiment = label_encoder.inverse_transform(predicted.numpy())
    return sentiment[0]


In [44]:


def predict_text_from_image(image_path, model, transform, device):
    image = Image.open(image_path).convert('L')  # Load and convert image to grayscale
    image = transform(image).unsqueeze(0).to(device)  # Apply transformation and move to device

    with torch.no_grad():
        outputs = model(image)  # Forward pass through the model

        # Ensure outputs has the expected shape (batch_size, sequence_length, num_classes)
        if outputs.dim() == 3:
            _, predicted_sequence = torch.max(outputs, dim=2)  # Get the predicted class indices along the sequence dimension
        elif outputs.dim() == 2:
            _, predicted_sequence = torch.max(outputs.unsqueeze(0), dim=1)  # Handle 2D case by adding batch dimension
        else:
            raise ValueError(f"Unexpected outputs dimension: {outputs.dim()}")

    # Convert numeric labels to characters or words
    predicted_sentence = ''.join([chr(pred.item() + ord('a')) for pred in predicted_sequence.squeeze()])

    return predicted_sentence.strip()



# Load your handwritten text recognition model (CRNN)
num_classes = 26  # Assuming 26 classes for alphabet recognition
htr_model = CRNN(num_classes=num_classes).to(device)
htr_model.load_state_dict(torch.load('handwritten_text_recognition_model.pth'))
htr_model.eval()

# Evaluate on test images
correct = 0
total = 0

for idx, row in labels_df.iterrows():
    image_name = row['file']
    actual_sentiment = row['sentiment']

    image_path = os.path.join(image_dir, image_name)

    # Recognize text from image
    recognized_sentence = predict_text_from_image(image_path, htr_model, transform, device)

    # Predict sentiment using Naive Bayes classifier
    X_text = vectorizer.transform([recognized_sentence])
    X_text_tfidf = tfidf_transformer.transform(X_text)

    # Convert X_text_tfidf to PyTorch tensor
    X_text_tfidf = torch.tensor(X_text_tfidf.toarray(), dtype=torch.float32)

    predicted_sentiment = label_encoder.inverse_transform(model.predict(X_text_tfidf))[0]

    print(f'Image: {image_name}')
    print(f'Recognized Sentence: {recognized_sentence}')
    print(f'Actual Sentiment: {actual_sentiment}')
    print(f'Predicted Sentiment: {predicted_sentiment}')
    print('---')

    if predicted_sentiment == actual_sentiment:
        correct += 1
    total += 1

accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Image: line_1.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Angry
Predicted Sentiment: Neutral
---
Image: line_2.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Angry
Predicted Sentiment: Neutral
---
Image: line_3.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Happy
Predicted Sentiment: Neutral
---
Image: line_4.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Happy
Predicted Sentiment: Neutral
---
Image: line_5.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Neutral
Predicted Sentiment: Neutral
---
Image: line_6.png
Recognized Sentence: aaaaaaaaaaaaaaaaaaaaaaaaaa
Actual Sentiment: Neutral
Predicted Sentiment: Neutral
---
Test Accuracy: 33.33%
