# Case Stydy 2

**Task 1: Data Extraction and Preprocessing**

In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

# Load the data
train_data = pd.read_csv("/content/characters.csv")
test_data = pd.read_csv("/content/characters-test.csv")

# Extract labels and images
train_labels = train_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values.reshape(-1, 1, 28, 28).astype('float32') / 255.0

test_labels = test_data.iloc[:, 0].values
test_images = test_data.iloc[:, 1:].values.reshape(-1, 1, 28, 28).astype('float32') / 255.0

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)


In [None]:
label_to_char = {}
with open("/content/mapping.txt", "r") as file:
    for line in file:
        num, ascii_code = line.split()
        label_to_char[int(num)] = chr(int(ascii_code))


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.pool = nn.MaxPool2d(2)

        # Use a dummy tensor to calculate the output size dynamically
        dummy_input = torch.zeros(1, 1, 28, 28)
        dummy_output = self.pool(F.relu(self.conv2(self.pool(F.relu(self.conv1(dummy_input))))))
        self.flatten_size = dummy_output.numel()

        self.fc1 = nn.Linear(self.flatten_size, 128)
        self.fc2 = nn.Linear(128, len(label_to_char))

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Create DataLoaders
train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

# Define the model, loss, and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(5):  # Train for 5 epochs
    model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation accuracy
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Epoch {epoch+1}, Validation Accuracy: {100 * correct / total:.2f}%")


Epoch 1, Validation Accuracy: 83.34%
Epoch 2, Validation Accuracy: 85.24%
Epoch 3, Validation Accuracy: 86.32%
Epoch 4, Validation Accuracy: 86.66%
Epoch 5, Validation Accuracy: 86.80%


In [None]:
torch.save(model.state_dict(), "cnn_model.pth")
print("CNN model saved successfully!")


CNN model saved successfully!


In [None]:
from torch.utils.data import DataLoader, TensorDataset

# Assuming test_images and test_labels are prepared and normalized
# Convert test_images and test_labels to PyTorch tensors
test_images_tensor = torch.tensor(test_images).float()
test_labels_tensor = torch.tensor(test_labels).long()

# Create a TensorDataset
test_dataset = TensorDataset(test_images_tensor, test_labels_tensor)

# Create the test DataLoader
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:
import torch

# Check if GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [None]:
# Move the model to the device (GPU in this case)
model = model.to(device)

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy of the CNN model: {100 * correct / total:.2f}%")


Test Accuracy of the CNN model: 86.22%


In [None]:
# Ensure the model is on the correct device
model = model.to(device)

# Decode predictions into characters
reconstructed_sentences = []

with torch.no_grad():
    for images, labels in test_loader:
        # Move images and labels to the same device as the model
        images, labels = images.to(device), labels.to(device)

        # Perform the forward pass
        outputs = model(images)

        # Decode predictions
        _, predicted = torch.max(outputs, 1)
        characters = [label_to_char[label.item()] for label in predicted]
        reconstructed_sentences.append("".join(characters))

# Print some example sentences
for i, sentence in enumerate(reconstructed_sentences[:5]):
    print(f"Reconstructed Sentence {i+1}: {sentence}")


Reconstructed Sentence 1: e9Q9XEA3CG72GGR68SrT84ZeFeXb9eLSFnCKQC1bbqN35nP1EGVqLQHqSYZ0eXGX
Reconstructed Sentence 2: 23qIadQSZeNEK3ZrHSXCWVBDfTGWQD1Cg5hHd4BJBOBAArf4URb4P88GY3dA1QdK
Reconstructed Sentence 3: ePUTBd5QhFDAXC5PWa0CJ9CAD9YEtNKFWZq5M4KU1SZ6XtASPQTQ1E9KDQ43fKTt
Reconstructed Sentence 4: L0d14MD3O5t6AGPBfASSXIPHTK06d5Id4nhCQ5AhL6eACOa09I9Sr4P2hTgDU92N
Reconstructed Sentence 5: JH8AX2FYnSFYa3GatAIRRU8N2dFfBH5B2PKNXSS8FaBdN74L815ZRJZLKKD7qM2F


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_true = []  # True labels
y_pred = []  # Predicted labels

with torch.no_grad():
    for images, labels in test_loader:
        # Move images and labels to the GPU (or the device where the model is located)
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # Move predictions and labels to CPU for sklearn compatibility
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Calculate metrics
precision = precision_score(y_true, y_pred, average="macro")
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")
accuracy = sum(y_t == y_p for y_t, y_p in zip(y_true, y_pred)) / len(y_true)

# Print metrics
print("=== CNN Performance ===")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-Score : {f1:.4f}")


=== CNN Performance ===
Accuracy : 0.8622
Precision: 0.8657
Recall   : 0.8622
F1-Score : 0.8600


In [None]:
from torchvision import models

resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, len(label_to_char))
resnet18 = resnet18.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 71.1MB/s]


In [None]:
import torchvision.models as models

# Load Pretrained ResNet-18
resnet18 = models.resnet18(pretrained=True)

num_classes = len(label_to_char)
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)

resnet18 = resnet18.to(device)

optimizer = optim.Adam(resnet18.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(5):  # Train for 5 epochs
    resnet18.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        images = images.repeat(1, 3, 1, 1)

        optimizer.zero_grad()

        outputs = resnet18(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print progress every 100 mini-batches
        if (i + 1) % 100 == 0:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

    # Validation Step
    resnet18.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            images = images.repeat(1, 3, 1, 1)

            outputs = resnet18(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    print(f"Epoch {epoch+1}: Validation Accuracy = {val_accuracy:.2f}%")

print("Training Completed!")


[Epoch 1, Batch 100] Loss: 2.3428
[Epoch 1, Batch 200] Loss: 0.9029
[Epoch 1, Batch 300] Loss: 0.6482
[Epoch 1, Batch 400] Loss: 0.5874
[Epoch 1, Batch 500] Loss: 0.5209
[Epoch 1, Batch 600] Loss: 0.4911
[Epoch 1, Batch 700] Loss: 0.4730
[Epoch 1, Batch 800] Loss: 0.4677
[Epoch 1, Batch 900] Loss: 0.4371
[Epoch 1, Batch 1000] Loss: 0.4453
[Epoch 1, Batch 1100] Loss: 0.4340
[Epoch 1, Batch 1200] Loss: 0.4202
[Epoch 1, Batch 1300] Loss: 0.4138
[Epoch 1, Batch 1400] Loss: 0.4126
Epoch 1: Validation Accuracy = 87.42%
[Epoch 2, Batch 100] Loss: 0.3509
[Epoch 2, Batch 200] Loss: 0.3606
[Epoch 2, Batch 300] Loss: 0.3580
[Epoch 2, Batch 400] Loss: 0.3482
[Epoch 2, Batch 500] Loss: 0.3661
[Epoch 2, Batch 600] Loss: 0.3506
[Epoch 2, Batch 700] Loss: 0.3602
[Epoch 2, Batch 800] Loss: 0.3455
[Epoch 2, Batch 900] Loss: 0.3686
[Epoch 2, Batch 1000] Loss: 0.3547
[Epoch 2, Batch 1100] Loss: 0.3535
[Epoch 2, Batch 1200] Loss: 0.3700
[Epoch 2, Batch 1300] Loss: 0.3255
[Epoch 2, Batch 1400] Loss: 0.3424


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Evaluate ResNet-18 on the test set
resnet18.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        images = images.repeat(1, 3, 1, 1)

        outputs = resnet18(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Calculate evaluation metrics
accuracy = sum(y_t == y_p for y_t, y_p in zip(y_true, y_pred)) / len(y_true)
precision = precision_score(y_true, y_pred, average="macro")
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")

print("=== ResNet-18 Model Performance ===")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-Score: {f1:.4f}")


=== ResNet-18 Model Performance ===
Accuracy: 0.8856
Precision: 0.8902
Recall   : 0.8856
F1-Score: 0.8843


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_model(model, data_loader, label_to_char, device):
    model.eval()
    y_true = []
    y_pred = []
    reconstructed_sentences = []

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)

            if isinstance(model, models.ResNet):
                images = images.repeat(1, 3, 1, 1)

            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

            # Decode predictions into characters
            characters = [label_to_char[label.item()] for label in predicted]
            reconstructed_sentences.append("".join(characters))

    # Calculate evaluation metrics
    accuracy = sum(y_t == y_p for y_t, y_p in zip(y_true, y_pred)) / len(y_true)
    precision = precision_score(y_true, y_pred, average="macro")
    recall = recall_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, y_pred, average="macro")

    return accuracy, precision, recall, f1, reconstructed_sentences

# Evaluate CNN Model
cnn_accuracy, cnn_precision, cnn_recall, cnn_f1, cnn_sentences = evaluate_model(
    model, test_loader, label_to_char, device
)

# Evaluate ResNet-18 Model
resnet_accuracy, resnet_precision, resnet_recall, resnet_f1, resnet_sentences = evaluate_model(
    resnet18, test_loader, label_to_char, device
)

# Print Comparison Table
print("=== Model Comparison ===")
print("CNN Model:")
print(f"- Accuracy: {cnn_accuracy:.4f}")
print(f"- Precision: {cnn_precision:.4f}")
print(f"- Recall   : {cnn_recall:.4f}")
print(f"- F1-Score: {cnn_f1:.4f}")
print("\nResNet-18 Model:")
print(f"- Accuracy: {resnet_accuracy:.4f}")
print(f"- Precision: {resnet_precision:.4f}")
print(f"- Recall   : {resnet_recall:.4f}")
print(f"- F1-Score: {resnet_f1:.4f}")

# Print Example Reconstructed Sentences
print("\n=== Example Reconstructed Sentences ===")
print("CNN Model:")
for i, sentence in enumerate(cnn_sentences[:5]):
    print(f"Sentence {i+1}: {sentence}")

print("\nResNet-18 Model:")
for i, sentence in enumerate(resnet_sentences[:5]):
    print(f"Sentence {i+1}: {sentence}")


=== Model Comparison ===
CNN Model:
- Accuracy: 0.8622
- Precision: 0.8657
- Recall   : 0.8622
- F1-Score: 0.8600

ResNet-18 Model:
- Accuracy: 0.8856
- Precision: 0.8902
- Recall   : 0.8856
- F1-Score: 0.8843

=== Example Reconstructed Sentences ===
CNN Model:
Sentence 1: e9Q9XEA3CG72GGR68SrT84ZeFeXb9eLSFnCKQC1bbqN35nP1EGVqLQHqSYZ0eXGX
Sentence 2: 23qIadQSZeNEK3ZrHSXCWVBDfTGWQD1Cg5hHd4BJBOBAArf4URb4P88GY3dA1QdK
Sentence 3: ePUTBd5QhFDAXC5PWa0CJ9CAD9YEtNKFWZq5M4KU1SZ6XtASPQTQ1E9KDQ43fKTt
Sentence 4: L0d14MD3O5t6AGPBfASSXIPHTK06d5Id4nhCQ5AhL6eACOa09I9Sr4P2hTgDU92N
Sentence 5: JH8AX2FYnSFYa3GatAIRRU8N2dFfBH5B2PKNXSS8FaBdN74L815ZRJZLKKD7qM2F

ResNet-18 Model:
Sentence 1: e9QqXEB3LG72GGR6qSrT84ZeFeXb9CLSfnCKQC1bbqN35nP1E6VgLQH9SYZ0eXFX
Sentence 2: 23qIadQSZeNfX3ZrHSXCWV8DfTGWQDLCq5hHdYBJB0BAAnf4URb4P88GY3dA1QdK
Sentence 3: ePUTBd5QhfDAXC5P6a0CJ9CAD9YEtNKfWZq5M4KUIS26XtASPQTQ1E9KDQ43fKTt
Sentence 4: L0d1HMD305t6AGPBfASSXIPHTK06d5Id4nhCQ5AhL6eAC0n09I9SrYP2hTgDU92N
Sentence 5: JH8AX2FYnSFYa3G