<a href="https://colab.research.google.com/github/HayBeeCoder/visio-computativa/blob/main/image_classification_using_cifar_10_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **SETUP & DATA PREPARATION**

In [None]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from collections import Counter
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from torch.utils.data import DataLoader, random_split
# from torchinfo import summary
import matplotlib.pyplot as plt
import pandas as pd

from PIL import Image


In [None]:
if torch.cuda.is_available():
   device = "cuda"
elif torch.backends.mps.is_available():
   device = "mps"
else:
   device = "cpu"

print(f"Using {device} device.")


In [None]:
class ConvertToRGB:
  def __call__(self, image):
    if image.mode != "RGB":
      image = image.convert("RGB")
    return image

In [None]:
transform = transforms.Compose([
    ConvertToRGB(),
    transforms.ToTensor()
])

In [None]:
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

In [None]:
trainset[0][0].size

counter = 0
for item in trainset:
  if item[0].size == (32,32):
    counter += 1
print(counter)


In [None]:
batch_size=32
train_data_loader = DataLoader(trainset, batch_size=batch_size)

In [None]:
first_batch = next(iter(train_data_loader))

print(f"Shape of one batch: {first_batch[0].shape}")
print(f"Shape of labels: {first_batch[1].shape}")


In [None]:

counts = Counter(x[1] for x in tqdm(trainset))
# print("The counts dictionary:", counts)

# print("The class_to_idx dictionary:", trainset.class_to_idx)

class_distribution = {x: counts[trainset.class_to_idx[x]] for x in trainset.class_to_idx }
print(class_distribution)

# **DATA ANALYSIS & NORMALIZATION**

In [None]:
def get_mean_std(loader):

  channels_sum, channels_squared_sum, num_batches = 0, 0, 0
  for data, _ in tqdm(loader, desc="Computing mean and std", leave=False):
    channels_sum += torch.mean(data, dim=[0, 2, 3])
    channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
    num_batches += 1
  mean = channels_sum / num_batches
  std = (channels_squared_sum / num_batches - mean**2) ** 0.5

  return mean, std

In [None]:
mean, std = get_mean_std(train_data_loader)

In [None]:
transform_norm = transforms.Compose([
    ConvertToRGB(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [None]:
normalized_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_norm)

# **DATA SPLITTING AND VALIDATION**

In [None]:

g = torch.Generator()
g.manual_seed(42)
train_set, val_set = random_split(normalized_trainset, [0.8, 0.2], generator=g)

In [None]:
len(train_set)

In [None]:


def count(dataset):
  counts = Counter(x[1] for x in tqdm(dataset))
  classes = dataset.dataset.class_to_idx
  return pd.Series({cat: counts[index] for cat, index in classes.items() })

count(train_set)

In [None]:
train_class_distribution = count(train_set)
val_class_distribution = count(val_set)

In [None]:
train_class_distribution.sort_values().plot(kind="bar")
plt.ylim(2000,4000)

In [None]:
val_class_distribution.sort_values().plot(kind="bar")

# **CNN MODEL ARCHITECTURE**