In [None]:
from tqdm import tqdm
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import os
import torch.optim as optim

In [None]:

# Define the path to the data folder
data_dir = '/kaggle/input/small-project-dataset/extracted_images_small'

# Define transformations to be applied to the images
transform = transforms.Compose([
    transforms.Grayscale(),          # Convert images to grayscale
    transforms.Resize((28, 28)),     # Resize images to 28x28
    transforms.ToTensor(),           # Convert images to PyTorch tensors
])

# Create an ImageFolder dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Get the class labels
class_labels = dataset.classes

train_size = int(0.6 * len(dataset))
test_size = int(0.2 * len(dataset))
eval_size = len(dataset) - train_size - test_size

# Split the dataset into training, testing, and evaluation sets
train_dataset, test_dataset, eval_dataset = random_split(dataset, [train_size, test_size, eval_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
eval_loader = DataLoader(eval_dataset, batch_size=64, shuffle=False)





In [None]:
len(train_dataset)

In [None]:
col = int(len(class_labels)/8+1)
fig, axes = plt.subplots(8, col,figsize=(15, 20))

labels = []
for image, class_index in tqdm(train_dataset, total=len(train_dataset)):
    if class_index not in labels:
        axes[class_index//col,class_index%col].imshow(image.permute(1, 2, 0))  # Convert tensor to image format (HWC)
        axes[class_index//col,class_index%col].set_title(class_labels[class_index])
        axes[class_index//col,class_index%col].axis('off')
        labels.append(class_index)

plt.tight_layout()
plt.show()


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SymbolDetectorCNN(nn.Module):
    def __init__(self):
        
        super(SymbolDetectorCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 30, 5)  # input channels, output channels, kernel size
        self.pool = nn.MaxPool2d(2, 2)     # kernel size, stride
        self.conv2 = nn.Conv2d(30, 15, 3)  # input channels, output channels, kernel size
        self.fc1 = nn.Linear(15 * 5 * 5, 128)  # input size, output size
        self.fc2 = nn.Linear(128, 82)  # input size, output size
          # input size, output size
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout(x.view(-1, 15 * 5 * 5))  # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)


# Instantiate the CNN
model = SymbolDetectorCNN()

# Print the model architecture
print(model)


In [None]:
import os
import cv2
import matplotlib.pyplot as plt

# Path to the folder containing images
folder_path = '/kaggle/input/equation-crohme-png/CROHME_training_png'

# Iterate over the images in the folder
for i,filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.png'):
        # Load the image
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path)

        # Convert the image to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to binarize the image
        _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # Find contours in the binary image
        contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Extract bounding boxes around the contours
        character_boxes = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            # Filter out very small contours (noise)
            if w > 10 and h > 10:
                character_boxes.append((x, y, w, h))

        # Draw bounding boxes on the original image
        output_image = image.copy()
        for (x, y, w, h) in character_boxes:
            cv2.rectangle(output_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Show the original image with bounding boxes
        plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title(filename)  # Show filename as the title
        plt.show()
        
        if(i>20):
            break


In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Path to the folder containing images
folder_path = '/kaggle/input/equation-crohme-png/CROHME_training_png'

# Iterate over the images in the folder
for i,filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.png'):
        # Load the image
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path)

        # Convert the image to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to binarize the image
        _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # Compute horizontal projection (sum along rows)
        horizontal_projection = np.sum(binary_image, axis=1)

        # Compute vertical projection (sum along columns)
        vertical_projection = np.sum(binary_image, axis=0)

        # Plot horizontal projection
        plt.figure(figsize=(12, 6))
        plt.subplot(1, 3, 1)
        plt.imshow(binary_image, cmap='gray')
        plt.title('Binary Image')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.plot(horizontal_projection, range(binary_image.shape[0]), color='black')
        plt.title('Horizontal Projection')
        plt.xlabel('Pixel Count')
        plt.ylabel('Row')
        plt.gca().invert_yaxis()  # Invert y-axis to match image coordinates

        # Plot vertical projection
        plt.subplot(1, 3, 3)
        plt.plot(range(binary_image.shape[1]), vertical_projection, color='black')
        plt.title('Vertical Projection')
        plt.xlabel('Column')
        plt.ylabel('Pixel Count')
        
        plt.suptitle(filename)  # Show filename as the title
        plt.tight_layout()
        plt.show()
        
        if i > 10:
            break


In [None]:
dataset[0][1]

In [None]:
x = SymbolDetectorCNN()
x.forward(dataset[0][0])

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SymbolDetectorCNN().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
    
    # Evaluation
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# Assuming train_loader and test_loader are defined

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SymbolDetectorCNN().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

# Training loop
num_epochs = 100
best_accuracy = 0.0  # To track the best accuracy achieved during training
for epoch in range(num_epochs):
    print(f"Learning rate before epoch {epoch+1}: {optimizer.param_groups[0]['lr']}")
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
    
    # Evaluation
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")
    
    # Adjust learning rate based on validation accuracy
    scheduler.step(epoch_loss)
    
    # Save the model if it achieves the best accuracy so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), 'best_model.pth')
