In [11]:
import os
import pandas as pd

# Paths
img_folder = 'crohme/train/img'
caption_file = 'crohme/train/caption.txt'

# Collect all .bmp filenames from the img folder
img_files = set([file for file in os.listdir(img_folder) if file.endswith('.bmp')])

# Collect image-label mappings from caption.txt
data = {}
with open(caption_file, 'r', encoding='utf-8') as file:
    for line in file:
        parts = line.strip().split(' ', 1)
        if len(parts) == 2:
            img_name, label = parts
            data[img_name] = label

# Ensure every .bmp file is included in the dataset
final_data = []
for img in img_files:
    label = data.get(img, "__MISSING__")  # Assign '__MISSING__' if no label is found
    final_data.append({
        'image_path': os.path.join(img_folder, img),
        'label': label
    })

# Save to CSV
df = pd.DataFrame(final_data)
df.to_csv('crohme_labels.csv', index=False)

print(f"Dataset successfully created with {len(df)} entries.")


Dataset successfully created with 8834 entries.


In [12]:
import os
import pandas as pd

# Paths
img_folder = 'crohme/train/img'
csv_file = 'crohme_labels.csv'

# Collect all .bmp filenames from the img folder
img_files = set([file for file in os.listdir(img_folder) if file.endswith('.bmp')])

# Collect image names from the CSV
csv_data = pd.read_csv(csv_file)
csv_images = set(csv_data['image_path'].apply(lambda x: os.path.basename(x)))

# Identify the missing file
missing_file = img_files - csv_images

print(f"Missing file: {missing_file}")


Missing file: set()


In [13]:
import pandas as pd

# Load the CSV
csv_data = pd.read_csv('crohme_labels.csv')

# Check for duplicates
duplicates = csv_data['image_path'].duplicated().sum()

if duplicates:
    print(f"Duplicate entries found: {duplicates}")
    print(csv_data[csv_data['image_path'].duplicated(keep=False)])
else:
    print("No duplicates found in the CSV.")


No duplicates found in the CSV.


In [14]:
import cv2
import numpy as np
import os
import pandas as pd

# Load the dataset
data = pd.read_csv('crohme_labels.csv')

# Folder for processed images
processed_folder = 'crohme/processed_images'
os.makedirs(processed_folder, exist_ok=True)

# Preprocess images
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    
    # Resize to 128x128
    img_resized = cv2.resize(img, (128, 128))
    
    # Gaussian Blur for noise reduction
    img_blurred = cv2.GaussianBlur(img_resized, (3, 3), 0)
    
    # Normalize pixel values
    img_normalized = img_blurred / 255.0
    
    return img_normalized

# Process and save images
for idx, row in data.iterrows():
    img = preprocess_image(row['image_path'])
    img_name = os.path.basename(row['image_path'])
    cv2.imwrite(os.path.join(processed_folder, img_name), (img * 255).astype(np.uint8))

print("Image preprocessing completed successfully!")


Image preprocessing completed successfully!


In [16]:
pip install imgaug


Collecting imgaug
  Downloading imgaug-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m948.0/948.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: imgaug
Successfully installed imgaug-0.4.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:
import imgaug.augmenters as iaa

# Augmentation pipeline
augmenter = iaa.Sequential([
    iaa.Affine(rotate=(-20, 20), scale=(0.85, 1.15), translate_percent=(-0.1, 0.1)),
    iaa.ElasticTransformation(alpha=1.0, sigma=0.5)
])

# Folder for augmented images
augmented_folder = 'crohme/augmented_images'
os.makedirs(augmented_folder, exist_ok=True)

# Augment images
for file in os.listdir(processed_folder):
    img = cv2.imread(os.path.join(processed_folder, file), cv2.IMREAD_GRAYSCALE)
    augmented_img = augmenter(image=img)
    cv2.imwrite(os.path.join(augmented_folder, file), augmented_img)

print("Data augmentation completed successfully!")


Data augmentation completed successfully!


In [18]:
from sklearn.model_selection import train_test_split
import shutil

# Load dataset
data = pd.read_csv('crohme_labels.csv')

# Split data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Create split folders
os.makedirs('crohme/train_data', exist_ok=True)
os.makedirs('crohme/test_data', exist_ok=True)

# Copy images to respective folders
for idx, row in train_data.iterrows():
    shutil.copy(row['image_path'], 'crohme/train_data')

for idx, row in test_data.iterrows():
    shutil.copy(row['image_path'], 'crohme/test_data')

print("Data split completed successfully!")


Data split completed successfully!


In [37]:
!pip uninstall torch torchvision -y
!pip cache purge
!pip install torch==2.2.0 torchvision==0.17.0


Found existing installation: torch 2.2.0
Uninstalling torch-2.2.0:
  Successfully uninstalled torch-2.2.0
Found existing installation: torchvision 0.17.0
Uninstalling torchvision-0.17.0:
  Successfully uninstalled torchvision-0.17.0
Files removed: 1813
Collecting torch==2.2.0
  Downloading torch-2.2.0-cp311-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting torchvision==0.17.0
  Downloading torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Downloading torch-2.2.0-cp311-none-macosx_11_0_arm64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch, torchvision
Successfully installed torch-2.2.0 torchvision-0.17.0

[1

In [38]:
import torch
import torchvision
print("Torch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/55/mhy5dtxs1g11qqd33n2hv53h0000gn/T/ipykernel_3303/3750727675.py", line 2, in <module>
    import torchvision
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/__init__.py", line 6, in <module>
    from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/_meta_registrations.py", line 25, in <module>
    @register_meta("roi_align")
     ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/_meta_registrations.py", line 18, in wrapper
    if torchvision.extension._has_ops():
       ^^^^^^^^^^^^^^^^^^^^^
AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import

In [1]:
import torch
import torchvision
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)

PyTorch Version: 2.2.0
Torchvision Version: 0.17.0


In [25]:
import torch
import torch.nn as nn
from torchvision import models
from transformers import SwinTransformerModel

# Swin Transformer Encoder
class SwinEncoder(nn.Module):
    def __init__(self):
        super(SwinEncoder, self).__init__()
        self.swin = SwinTransformerModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224")

    def forward(self, x):
        features = self.swin(x).last_hidden_state
        return features

# Transformer Decoder
class TransformerDecoder(nn.Module):
    def __init__(self, vocab_size, hidden_dim):
        super(TransformerDecoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, hidden_dim)
        self.transformer_decoder = nn.TransformerDecoderLayer(d_model=hidden_dim, nhead=8)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, tgt, memory):
        embedded = self.embedding(tgt)
        output = self.transformer_decoder(embedded, memory)
        return self.fc(output)

# Full Model (Encoder + Decoder)
class MathExpressionModel(nn.Module):
    def __init__(self, vocab_size, hidden_dim=512):
        super(MathExpressionModel, self).__init__()
        self.encoder = SwinEncoder()
        self.decoder = TransformerDecoder(vocab_size, hidden_dim)

    def forward(self, x, tgt):
        encoded_features = self.encoder(x)
        return self.decoder(tgt, encoded_features)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/55/mhy5dtxs1g11qqd33n2hv53h0000gn/T/ipykernel_3303/42382682.py", line 3, in <module>
    from torchvision import models
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/__init__.py", line 6, in <module>
    from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/_meta_registrations.py", line 25, in <module>
    @register_meta("roi_align")
     ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/torchvision/_meta_registrations.py", line 18, in wrapper
    if torchvision.extension._has_ops():
       ^^^^^^^^^^^^^^^^^^^^^
AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circu

In [27]:
pip uninstall torch torchvision -y

Found existing installation: torch 2.2.0
Uninstalling torch-2.2.0:
  Successfully uninstalled torch-2.2.0
Found existing installation: torchvision 0.17.0
Uninstalling torchvision-0.17.0:
  Successfully uninstalled torchvision-0.17.0
Note: you may need to restart the kernel to use updated packages.


In [28]:
pip install torch==2.2.0 torchvision==0.17.0


Collecting torch==2.2.0
  Using cached torch-2.2.0-cp311-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting torchvision==0.17.0
  Using cached torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Using cached torch-2.2.0-cp311-none-macosx_11_0_arm64.whl (59.4 MB)
Using cached torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl (1.6 MB)
Installing collected packages: torch, torchvision
Successfully installed torch-2.2.0 torchvision-0.17.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torchvision
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)


PyTorch Version: 2.2.0
Torchvision Version: 0.17.0


In [30]:
pip uninstall torch torchvision -y


Found existing installation: torch 2.2.0
Uninstalling torch-2.2.0:
  Successfully uninstalled torch-2.2.0
Found existing installation: torchvision 0.17.0
Uninstalling torchvision-0.17.0:
  Successfully uninstalled torchvision-0.17.0
Note: you may need to restart the kernel to use updated packages.


In [31]:
rm -rf ~/.cache/pip


In [32]:
pip install torch==2.2.0 torchvision==0.17.0

Collecting torch==2.2.0
  Using cached torch-2.2.0-cp311-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting torchvision==0.17.0
  Using cached torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Using cached torch-2.2.0-cp311-none-macosx_11_0_arm64.whl (59.4 MB)
Using cached torchvision-0.17.0-cp311-cp311-macosx_11_0_arm64.whl (1.6 MB)
Installing collected packages: torch, torchvision
Successfully installed torch-2.2.0 torchvision-0.17.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import torchvision
print("Torch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)


Torch version: 2.2.0
Torchvision version: 0.17.0


In [4]:
import torch
import torch.nn as nn
import torchvision.models as models

# Load ResNet50 with pretrained weights
class CustomResNet50(nn.Module):
    def __init__(self, num_classes):
        super(CustomResNet50, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)  # Custom FC layer

    def forward(self, x):
        return self.resnet(x)

# Instantiate the model
num_classes = 10  # Change according to your dataset
model = CustomResNet50(num_classes)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/pratyushtiwari/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:37<00:00, 2.73MB/s]


CustomResNet50(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
    

In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [9]:
from torch.utils.data import DataLoader

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    model.train()

    for epoch in range(num_epochs):
        total_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Track performance
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        avg_loss = total_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")


In [7]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')


In [11]:
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms

# Sample Transformations (Adjust as per your dataset)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Example Dataset (Ensure your dataset object is correct)
train_dataset = TensorDataset(torch.randn(100, 3, 224, 224), torch.randint(0, 10, (100,)))

# Creating DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [12]:
for images, labels in train_loader:
    print(f"Batch size: {images.size()}, Labels: {labels}")
    break


Batch size: torch.Size([32, 3, 224, 224]), Labels: tensor([9, 0, 3, 0, 9, 1, 4, 3, 7, 6, 7, 4, 6, 1, 6, 3, 3, 1, 8, 2, 2, 3, 8, 4,
        6, 9, 8, 0, 0, 8, 3, 5])


In [20]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Custom Dataset for .bmp files
class BMPDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [
            os.path.join(root_dir, file) for file in os.listdir(root_dir) if file.endswith('.bmp')
        ]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)
        
        # Assuming no labels for now; adjust if needed
        return image, 0

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Dataset and DataLoader
train_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/train_data', 
                           transform=transform)


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Test loader
for images, labels in train_loader:
    print(f"Batch size: {images.size()}, Labels: {labels}")
    break


Batch size: torch.Size([32, 3, 224, 224]), Labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])


In [21]:
for images, labels in train_loader:
    print(f"Batch size: {images.size()}, Labels: {labels}")
    break  # To avoid printing the entire dataset


Batch size: torch.Size([32, 3, 224, 224]), Labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])


In [23]:
model = BMPClassifier()

# Dummy data with expected dimensions
dummy_input = torch.randn(32, 3, 224, 224)  # Batch size 32
output = model(dummy_input)
print(f"Model Output Shape: {output.shape}")


Model Output Shape: torch.Size([32, 10])


In [24]:
model.train()  # Ensure model is in training mode
for images, labels in train_loader:
    images, labels = images.float(), labels.long()  # Ensure correct data types

    optimizer.zero_grad()
    outputs = model(images)
    
    print(f"Output shape: {outputs.shape}")  # Debugging
    print(f"Labels shape: {labels.shape}")   # Debugging

    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    print(f"Loss: {loss.item():.4f}")
    break  # For testing only one batch


Output shape: torch.Size([32, 10])
Labels shape: torch.Size([32])
Loss: 2.2700


In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm  # Progress bar for better visualization

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set model to training mode
    for epoch in range(num_epochs):
        total_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)  # Move to GPU if available
            
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(images)
            
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            
            # Tracking loss and accuracy
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        
        avg_loss = total_loss / len(train_loader)
        accuracy = 100 * correct / total
        
        print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.2f}%")

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BMPClassifier().to(device)

# Train the model
train(model, train_loader, criterion, optimizer, num_epochs=10)


Epoch 1/10: 100%|██████████| 221/221 [01:20<00:00,  2.75it/s]


Epoch 1/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 2/10: 100%|██████████| 221/221 [01:18<00:00,  2.80it/s]


Epoch 2/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 3/10: 100%|██████████| 221/221 [01:19<00:00,  2.78it/s]


Epoch 3/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 4/10: 100%|██████████| 221/221 [01:20<00:00,  2.75it/s]


Epoch 4/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 5/10: 100%|██████████| 221/221 [01:19<00:00,  2.78it/s]


Epoch 5/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 6/10: 100%|██████████| 221/221 [01:22<00:00,  2.66it/s]


Epoch 6/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 7/10: 100%|██████████| 221/221 [01:19<00:00,  2.77it/s]


Epoch 7/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 8/10: 100%|██████████| 221/221 [01:21<00:00,  2.71it/s]


Epoch 8/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 9/10: 100%|██████████| 221/221 [01:19<00:00,  2.79it/s]


Epoch 9/10 | Loss: 2.2921 | Accuracy: 3.48%


Epoch 10/10: 100%|██████████| 221/221 [01:19<00:00,  2.78it/s]

Epoch 10/10 | Loss: 2.2921 | Accuracy: 3.48%





In [26]:
def evaluate(model, test_loader):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

# Evaluate the model
evaluate(model, test_loader)


NameError: name 'test_loader' is not defined

In [29]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

# Custom dataset for BMP files
class BMPDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = [f for f in os.listdir(root_dir) if f.endswith('.bmp')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.images[idx])
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        # Dummy label (since no class folders exist)
        label = 0  
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Initialize dataset and DataLoader
test_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/test_data', 
                          transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [30]:
def evaluate(model, test_loader):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

# Evaluate the model
evaluate(model, test_loader)


Test Accuracy: 3.11%


In [31]:
for images, labels in train_loader:
    print(f"Image Shape: {images.shape}, Labels: {labels}")
    break


Image Shape: torch.Size([32, 3, 224, 224]), Labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])


In [33]:
dummy_input = torch.randn(1, 3, 224, 224)  # Batch size 1 for testing
output = model(dummy_input)
print("Output Shape:", output.shape)


Output Shape: torch.Size([1, 10])


In [41]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

# Custom Dataset for BMP images
class BMPDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.files = [f for f in os.listdir(root_dir) if f.endswith('.bmp')]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.files[idx])
        image = Image.open(img_path).convert('RGB')

        # Improved label extraction logic
        try:
            label = int(self.files[idx].split('_')[0]) % 10  # Ensures label in range [0, 9]
        except ValueError:
            label = idx % 10  # Random fallback in case of invalid filenames

        if self.transform:
            image = self.transform(image)

        return image, label

# Model architecture
class BMPClassifier(nn.Module):
    def __init__(self):
        super(BMPClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 112 * 112, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = x.view(-1, 32 * 112 * 112)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Dataset and DataLoader
train_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/train_data', 
                           transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/test_data',
                          transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BMPClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

evaluate(model, test_loader)


Epoch [1/10], Loss: 3.9227
Epoch [2/10], Loss: 2.3086
Epoch [3/10], Loss: 2.3058
Epoch [4/10], Loss: 2.3034
Epoch [5/10], Loss: 2.3082
Epoch [6/10], Loss: 2.3001
Epoch [7/10], Loss: 2.3006
Epoch [8/10], Loss: 2.3027
Epoch [9/10], Loss: 2.3200
Epoch [10/10], Loss: 2.3091
Test Accuracy: 10.19%


In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

# Custom Dataset for BMP images
class BMPDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.files = [f for f in os.listdir(root_dir) if f.endswith('.bmp')]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.files[idx])
        image = Image.open(img_path).convert('RGB')

        # Improved label extraction logic
        try:
            label = int(self.files[idx].split('_')[0]) % 10  # Ensures label in range [0, 9]
        except ValueError:
            label = idx % 10  # Random fallback in case of invalid filenames

        if self.transform:
            image = self.transform(image)

        return image, label

# Model architecture
class BMPClassifier(nn.Module):
    def __init__(self):
        super(BMPClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 112 * 112, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = x.view(-1, 32 * 112 * 112)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Dataset and DataLoader
train_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/train_data', 
                           transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = BMPDataset(root_dir='/Users/pratyushtiwari/Desktop/IEEE_Hackathon/crohme/test_data',
                          transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BMPClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

evaluate(model, test_loader)


Epoch [1/100], Loss: 3.8594
Epoch [2/100], Loss: 2.3072
Epoch [3/100], Loss: 2.3157
Epoch [4/100], Loss: 2.3103
Epoch [5/100], Loss: 2.3098
Epoch [6/100], Loss: 2.3171
Epoch [7/100], Loss: 2.3026


KeyboardInterrupt: 