In [1]:
!pip install numpy

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import numpy as np

In [2]:
# Standard imports
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import csv

load_pickle() => is used to read the CIFAR 10 dataset and stores in binary format 

Data files are loaded in 5 batches, then the images and label's were extracted and were transformed from (C,H,W) to (H,W,C) using transpose(1,2,0)

Data augmentation and normalization was done using tensor, mean and standard deviation to stablize and speedup training. 

Images were randomly cropeed to 32x32 pixels and padding of 4 pixels was added to help in training so that images with slight shifts were also classified correctly 

DataLoader is used to help the model in learning data in random instead of leaning in a particular order every epoch, 4 worker threads were used to speedup the training process by parallelizing it. 

Batch size of 256 is used to process many samples of data simultaneously 

In [3]:
import pickle
import numpy as np

import pickle
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f, encoding='bytes')
    return data

batch_files = [
    "./data_batch_1",
    "./data_batch_2",
    "./data_batch_3",
    "./data_batch_4",
    "./data_batch_5"
]

train_data, train_labels = [], []
for file in batch_files:
    batch = load_pickle(file)
    train_data.append(batch[b'data'])       
    train_labels.extend(batch[b'labels'])

train_data = np.vstack(train_data).reshape(-1, 3, 32, 32)
train_labels = np.array(train_labels)

class CIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx].transpose(1, 2, 0)  
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

# Use your existing transform (update if needed)
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

# Initialize dataset and loader
trainset = CIFAR10Dataset(train_data, train_labels, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)

mish activation function is used to provide smooth gradients and improving the convergence of the network
Mish(x) = x . tanh(ln(1+e^x))

SE block generates attention weights which scales the output of the convolutional layers

Basic block has 2 convolutional layers with batch normalization and mish activation, skip connection is added to allow the flow of gradients solving the vanishing gradient problem


Modified ResNet18 - there are 4 major layers to capture different levels of abstraction that is built using stacking the basic blocks

Adaptive Average Pooling layers compresses the spatial dimensions into 1x1 per channel - used to summerize each feature map into a single value per channel

Normalization is added here as well for stability and speeding up the training 


While training the model all the transformations and conversions that were used while reading and procressing was added here.

Using cross entropy loss to evaluate how well it will align with the prediction 

SGD is used for stable learning over time 

Weight decay is used to reguarlize the model, tried with 5e-3, 1e-3

CosineAnnealingWarmRestarts is used to dynamically change learning rate based on the loss and number of epochs 

Training Loss

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm import tqdm
from torchsummary import summary

class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            Mish(),
            nn.Conv2d(channels // reduction, channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SEBlock(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        return Mish()(out)

class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, dropout_prob=0.5):
        super(ResNet18, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)

        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = F.dropout(out, p =0.5, training=self.training)
        out = torch.flatten(out, 1)
        return self.fc(out)

def train():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(),
        transforms.ToTensor(),          
        transforms.RandomErasing(),     
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)

    model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-3)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    for epoch in range(100):
        model.train()
        total_loss = 0
        correct = 0
        for inputs, labels in tqdm(trainloader, desc=f"Epoch {epoch+1}/200"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            correct += outputs.argmax(dim=1).eq(labels).sum().item()

        scheduler.step()
        print(f'Epoch {epoch+1}: Loss {total_loss/len(trainloader):.4f} | Acc: {100.*correct/len(trainset):.2f}%')
        torch.save(model.state_dict(), "1optimized_resnet18.pth")
        
    print("\nüîç Model Summary After 100 Epochs:")
    summary(model, (3, 32, 32))

if __name__ == '__main__':
    train()

In [15]:
import torch
import torchvision.transforms as transforms
import pandas as pd
import pickle
#from model import ResNet18CIFAR  # Ensure that ResNet18CIFAR is defined in model.py

# ‚úÖ Step 1: Load the trained model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Instantiate the model architecture
model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
# Load the state dictionary and update the model
state_dict = torch.load("optimized_resnet18.pth", map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()  # Set to evaluation mode
print("‚úÖ Model loaded successfully!")

# ‚úÖ Step 2: Load CIFAR test data (without labels)
def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

file_path = "./cifar_test_nolabel.pkl"
cifar10_batch = load_cifar_batch(file_path)

test_images = cifar10_batch[b'data']  # Already in (N, 32, 32, 3) format
image_ids = cifar10_batch[b'ids']  # Extract image IDs

print(f"‚úÖ Test set loaded: {test_images.shape}")

# ‚úÖ Step 3: Convert test images to PyTorch tensors
# Convert to tensor and rearrange dimensions from (N, 32, 32, 3) to (N, C, H, W)
test_images = torch.tensor(test_images, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0

# Define normalization (same as training)
transform = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])

# Apply normalization to each image
test_images = torch.stack([transform(img) for img in test_images])

# ‚úÖ Step 4: Run inference
test_images = test_images.to(device)  # Move to GPU if available
batch_size = 64  # Adjust batch size if needed

predictions = []
with torch.no_grad():
    for i in range(0, len(test_images), batch_size):
        batch = test_images[i:i + batch_size]  # Get batch
        outputs = model(batch)  # Forward pass
        _, predicted_labels = torch.max(outputs, 1)  # Get predicted labels
        predictions.extend(predicted_labels.cpu().numpy())  # Convert to list

print("‚úÖ Inference completed!")

# ‚úÖ Step 5: Save predictions to CSV
submission_df = pd.DataFrame({
    "ID": image_ids,
    "Labels": predictions
})
submission_df.to_csv("submission.csv", index=False)
print("submission.csv generated! You can submit this file to Kaggle.")

‚úÖ Model loaded successfully!
‚úÖ Test set loaded: (10000, 32, 32, 3)
‚úÖ Inference completed!
submission.csv generated! You can submit this file to Kaggle.


In [16]:
import pandas as pd

# Load CSV file
df = pd.read_csv("./submission.csv")

# Display first few rows to understand structure
print(df.head())

   ID  Labels
0   0       6
1   1       1
2   2       8
3   3       6
4   4       9


In [17]:
# Count unique labels
unique_labels = df['Labels'].nunique()
print(f"Total unique labels: {unique_labels}")

# Show each unique label with their counts
label_counts = df['Labels'].value_counts()
print(label_counts)

Total unique labels: 10
Labels
8    1200
9    1151
3    1111
7    1050
5    1045
4     958
2     954
6     937
1     840
0     754
Name: count, dtype: int64


In [18]:
import torch
import torchvision.transforms as transforms
import pickle

def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

# ‚úÖ Load the trained model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
state_dict = torch.load("optimized_resnet18.pth", map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()  # Set to evaluation mode
print("‚úÖ Model loaded successfully!")

# ‚úÖ Load the labeled CIFAR test dataset
file_path = "test_batch"  # Adjust path as needed
cifar_batch = load_cifar_batch(file_path)

# Assuming the file contains keys: b'data' and b'labels'
test_images = cifar_batch[b'data']    # Expected shape: (N, 32, 32, 3) or (N, 3072)
true_labels = cifar_batch[b'labels']   # Ground truth labels

print(f"‚úÖ Test set loaded: {test_images.shape}")

# ‚úÖ Convert test images to PyTorch tensors
# Check if images are flattened (N, 3072) and reshape if needed
if test_images.shape[1] == 3072:
    # Reshape to (N, 3, 32, 32)
    test_images = test_images.reshape(-1, 3, 32, 32)
elif test_images.shape[1] == 32:
    # If the images are in (N, 32, 32, 3) format, permute dimensions
    test_images = torch.tensor(test_images, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
else:
    # Otherwise, assume it's already in a suitable tensor format
    test_images = torch.tensor(test_images, dtype=torch.float32) / 255.0

# If not already a tensor, convert it
if not isinstance(test_images, torch.Tensor):
    test_images = torch.tensor(test_images, dtype=torch.float32) / 255.0

# If the images are not in (N, C, H, W), permute them accordingly
if test_images.shape[1] != 3:
    test_images = test_images.permute(0, 3, 1, 2)

# Normalize images (using the same values as during training)
normalize_transform = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                             std=[0.2470, 0.2435, 0.2616])

test_images = torch.stack([normalize_transform(img) for img in test_images])
test_images = test_images.to(device)

# Convert true_labels to a tensor and move to the same device
true_labels = torch.tensor(true_labels, dtype=torch.long).to(device)

# ‚úÖ Run inference and compute accuracy
batch_size = 64
total = test_images.size(0)
correct = 0

with torch.no_grad():
    for i in range(0, total, batch_size):
        batch = test_images[i:i+batch_size]
        outputs = model(batch)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == true_labels[i:i+batch_size]).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on test set: {accuracy:.2f}%")

‚úÖ Model loaded successfully!
‚úÖ Test set loaded: (10000, 3072)
Accuracy on test set: 93.76%


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm import tqdm
from torchsummary import summary

# Mish Activation
class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

# SE Block
class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            Mish(),
            nn.Conv2d(channels // reduction, channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

# Optimized Basic Block
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SEBlock(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        return Mish()(out)

# ResNet18 Modified
class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet18, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)

        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = F.dropout(out, p =0.5, training=self.training)
        return self.fc(out)

def continue_training():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Data augmentation and normalization for training
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(),
        transforms.ToTensor(),          # convert PIL to Tensor first
        transforms.RandomErasing(),     # then apply RandomErasing
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    # Load CIFAR10 dataset
    trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)
    
    # Instantiate the model and load the state from the previous 100 epochs
    model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
    model.load_state_dict(torch.load("optimized_resnet18.pth"))
    
    # Define loss function, optimizer, and scheduler
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    # Continue training for another 100 epochs (from epoch 100 to 199)
    for epoch in range(100, 200):
        model.train()
        total_loss = 0
        correct = 0
        for inputs, labels in tqdm(trainloader, desc=f"Epoch {epoch+1}/200"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            correct += outputs.argmax(dim=1).eq(labels).sum().item()

        scheduler.step()
        print(f'Epoch {epoch+1}: Loss {total_loss/len(trainloader):.4f} | Acc: {100.*correct/len(trainset):.2f}%')
        torch.save(model.state_dict(), "optimized_resnet18.pth")
        
    print("\nüîç Model Summary After 200 Epochs:")
    summary(model, (3, 32, 32))

if __name__ == '__main__':
    continue_training()

Epoch 101/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:15<00:00, 12.62it/s]


Epoch 101: Loss 0.7867 | Acc: 87.84%


Epoch 102/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.27it/s]


Epoch 102: Loss 0.7901 | Acc: 87.68%


Epoch 103/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.75it/s]


Epoch 103: Loss 0.7848 | Acc: 88.07%


Epoch 104/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.45it/s]


Epoch 104: Loss 0.7724 | Acc: 88.54%


Epoch 105/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.60it/s]


Epoch 105: Loss 0.7611 | Acc: 88.98%


Epoch 106/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.61it/s]


Epoch 106: Loss 0.7436 | Acc: 89.72%


Epoch 107/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.66it/s]


Epoch 107: Loss 0.7275 | Acc: 90.54%


Epoch 108/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.22it/s]


Epoch 108: Loss 0.7149 | Acc: 91.19%


Epoch 109/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.15it/s]


Epoch 109: Loss 0.7129 | Acc: 91.13%


Epoch 110/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.73it/s]


Epoch 110: Loss 0.7021 | Acc: 91.72%


Epoch 111/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.58it/s]


Epoch 111: Loss 0.7760 | Acc: 88.38%


Epoch 112/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.41it/s]


Epoch 112: Loss 0.7842 | Acc: 88.09%


Epoch 113/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.63it/s]


Epoch 113: Loss 0.7807 | Acc: 88.18%


Epoch 114/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.21it/s]


Epoch 114: Loss 0.7804 | Acc: 88.03%


Epoch 115/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.38it/s]


Epoch 115: Loss 0.7741 | Acc: 88.42%


Epoch 116/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.74it/s]


Epoch 116: Loss 0.7671 | Acc: 88.67%


Epoch 117/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.41it/s]


Epoch 117: Loss 0.7607 | Acc: 89.02%


Epoch 118/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.98it/s]


Epoch 118: Loss 0.7560 | Acc: 89.16%


Epoch 119/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.28it/s]


Epoch 119: Loss 0.7478 | Acc: 89.73%


Epoch 120/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.63it/s]


Epoch 120: Loss 0.7380 | Acc: 90.03%


Epoch 121/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.50it/s]


Epoch 121: Loss 0.7264 | Acc: 90.60%


Epoch 122/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.27it/s]


Epoch 122: Loss 0.7247 | Acc: 90.64%


Epoch 123/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.24it/s]


Epoch 123: Loss 0.7118 | Acc: 91.27%


Epoch 124/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.89it/s]


Epoch 124: Loss 0.7073 | Acc: 91.43%


Epoch 125/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.22it/s]


Epoch 125: Loss 0.6969 | Acc: 91.85%


Epoch 126/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.14it/s]


Epoch 126: Loss 0.6915 | Acc: 92.08%


Epoch 127/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.87it/s]


Epoch 127: Loss 0.6877 | Acc: 92.37%


Epoch 128/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.94it/s]


Epoch 128: Loss 0.6845 | Acc: 92.47%


Epoch 129/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.11it/s]


Epoch 129: Loss 0.6811 | Acc: 92.56%


Epoch 130/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.24it/s]


Epoch 130: Loss 0.6819 | Acc: 92.44%


Epoch 131/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.66it/s]


Epoch 131: Loss 0.7619 | Acc: 88.92%


Epoch 132/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.45it/s]


Epoch 132: Loss 0.7717 | Acc: 88.43%


Epoch 133/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.36it/s]


Epoch 133: Loss 0.7734 | Acc: 88.53%


Epoch 134/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.52it/s]


Epoch 134: Loss 0.7745 | Acc: 88.30%


Epoch 135/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.33it/s]


Epoch 135: Loss 0.7725 | Acc: 88.26%


Epoch 136/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.12it/s]


Epoch 136: Loss 0.7694 | Acc: 88.63%


Epoch 137/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.52it/s]


Epoch 137: Loss 0.7694 | Acc: 88.58%


Epoch 138/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.38it/s]


Epoch 138: Loss 0.7611 | Acc: 89.07%


Epoch 139/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.66it/s]


Epoch 139: Loss 0.7660 | Acc: 88.61%


Epoch 140/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.14it/s]


Epoch 140: Loss 0.7564 | Acc: 89.19%


Epoch 141/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.39it/s]


Epoch 141: Loss 0.7537 | Acc: 89.43%


Epoch 142/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.13it/s]


Epoch 142: Loss 0.7496 | Acc: 89.57%


Epoch 143/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.64it/s]


Epoch 143: Loss 0.7443 | Acc: 89.61%


Epoch 144/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.72it/s]


Epoch 144: Loss 0.7449 | Acc: 89.67%


Epoch 145/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.29it/s]


Epoch 145: Loss 0.7403 | Acc: 89.88%


Epoch 146/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.45it/s]


Epoch 146: Loss 0.7316 | Acc: 90.24%


Epoch 147/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.53it/s]


Epoch 147: Loss 0.7280 | Acc: 90.45%


Epoch 148/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.34it/s]


Epoch 148: Loss 0.7258 | Acc: 90.57%


Epoch 149/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.33it/s]


Epoch 149: Loss 0.7207 | Acc: 90.75%


Epoch 150/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.61it/s]


Epoch 150: Loss 0.7137 | Acc: 91.05%


Epoch 151/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.52it/s]


Epoch 151: Loss 0.7144 | Acc: 91.12%


Epoch 152/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.51it/s]


Epoch 152: Loss 0.7097 | Acc: 91.32%


Epoch 153/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.71it/s]


Epoch 153: Loss 0.7001 | Acc: 91.67%


Epoch 154/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.71it/s]


Epoch 154: Loss 0.7027 | Acc: 91.46%


Epoch 155/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.14it/s]


Epoch 155: Loss 0.6920 | Acc: 92.08%


Epoch 156/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.58it/s]


Epoch 156: Loss 0.6878 | Acc: 92.20%


Epoch 157/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.25it/s]


Epoch 157: Loss 0.6847 | Acc: 92.33%


Epoch 158/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.29it/s]


Epoch 158: Loss 0.6796 | Acc: 92.54%


Epoch 159/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.82it/s]


Epoch 159: Loss 0.6781 | Acc: 92.74%


Epoch 160/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.58it/s]


Epoch 160: Loss 0.6750 | Acc: 92.89%


Epoch 161/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.90it/s]


Epoch 161: Loss 0.6703 | Acc: 93.00%


Epoch 162/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.16it/s]


Epoch 162: Loss 0.6654 | Acc: 93.23%


Epoch 163/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.59it/s]


Epoch 163: Loss 0.6624 | Acc: 93.43%


Epoch 164/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.72it/s]


Epoch 164: Loss 0.6639 | Acc: 93.37%


Epoch 165/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.17it/s]


Epoch 165: Loss 0.6599 | Acc: 93.61%


Epoch 166/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.76it/s]


Epoch 166: Loss 0.6609 | Acc: 93.41%


Epoch 167/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.46it/s]


Epoch 167: Loss 0.6583 | Acc: 93.66%


Epoch 168/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.21it/s]


Epoch 168: Loss 0.6590 | Acc: 93.51%


Epoch 169/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.61it/s]


Epoch 169: Loss 0.6569 | Acc: 93.64%


Epoch 170/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:14<00:00, 13.98it/s]


Epoch 170: Loss 0.6606 | Acc: 93.42%


Epoch 171/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.91it/s]


Epoch 171: Loss 0.7514 | Acc: 89.42%


Epoch 172/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.08it/s]


Epoch 172: Loss 0.7576 | Acc: 89.11%


Epoch 173/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.25it/s]


Epoch 173: Loss 0.7611 | Acc: 88.91%


Epoch 174/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.19it/s]


Epoch 174: Loss 0.7583 | Acc: 89.07%


Epoch 175/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.38it/s]


Epoch 175: Loss 0.7558 | Acc: 89.15%


Epoch 176/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.24it/s]


Epoch 176: Loss 0.7593 | Acc: 89.06%


Epoch 177/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.29it/s]


Epoch 177: Loss 0.7556 | Acc: 89.19%


Epoch 178/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.08it/s]


Epoch 178: Loss 0.7560 | Acc: 89.23%


Epoch 179/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.65it/s]


Epoch 179: Loss 0.7546 | Acc: 89.43%


Epoch 180/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.75it/s]


Epoch 180: Loss 0.7547 | Acc: 89.25%


Epoch 181/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.85it/s]


Epoch 181: Loss 0.7523 | Acc: 89.42%


Epoch 182/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.78it/s]


Epoch 182: Loss 0.7487 | Acc: 89.46%


Epoch 183/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.48it/s]


Epoch 183: Loss 0.7446 | Acc: 89.75%


Epoch 184/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.95it/s]


Epoch 184: Loss 0.7500 | Acc: 89.37%


Epoch 185/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.67it/s]


Epoch 185: Loss 0.7453 | Acc: 89.66%


Epoch 186/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:14<00:00, 13.88it/s]


Epoch 186: Loss 0.7391 | Acc: 89.96%


Epoch 187/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.96it/s]


Epoch 187: Loss 0.7435 | Acc: 89.63%


Epoch 188/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.08it/s]


Epoch 188: Loss 0.7391 | Acc: 89.83%


Epoch 189/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 15.01it/s]


Epoch 189: Loss 0.7385 | Acc: 89.98%


Epoch 190/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.67it/s]


Epoch 190: Loss 0.7341 | Acc: 90.22%


Epoch 191/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.69it/s]


Epoch 191: Loss 0.7288 | Acc: 90.43%


Epoch 192/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.80it/s]


Epoch 192: Loss 0.7292 | Acc: 90.36%


Epoch 193/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:12<00:00, 15.11it/s]


Epoch 193: Loss 0.7305 | Acc: 90.35%


Epoch 194/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.91it/s]


Epoch 194: Loss 0.7321 | Acc: 90.13%


Epoch 195/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 14.97it/s]


Epoch 195: Loss 0.7239 | Acc: 90.45%


Epoch 196/200: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 196/196 [00:13<00:00, 15.03it/s]


Epoch 196: Loss 0.7227 | Acc: 90.72%


Epoch 197/200:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 174/196 [00:11<00:01, 15.39it/s]