In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle

In [None]:
!kaggle datasets download -d iarunava/cell-images-for-detecting-malaria

In [None]:
import zipfile
import os
import shutil
import random
from pathlib import Path

In [None]:
zip_ref = zipfile.ZipFile('/content/cell-images-for-detecting-malaria.zip')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
cell_data = Path('/content/cell_data')
cell_data.mkdir(exist_ok = True)

In [None]:
train_data_path = Path('/content/cell_data/train')
train_data_path.mkdir(exist_ok = True)

In [None]:
test_data_path = Path('/content/cell_data/test')
test_data_path.mkdir(exist_ok = True)

In [None]:
train_parasite_path = Path('/content/cell_data/train/Parasitized')
train_parasite_path.mkdir(exist_ok = True)

test_parasite_path = Path('/content/cell_data/test/Parasitized')
test_parasite_path.mkdir(exist_ok = True)

In [None]:
train_uninfected_path = Path('/content/cell_data/train/Uninfected')
train_uninfected_path.mkdir(exist_ok = True)

test_uninfected_path = Path('/content/cell_data/test/Uninfected')
test_uninfected_path.mkdir(exist_ok = True)

In [None]:
parasite_source_folder = '/content/cell_images/Parasitized/'
uninfected_source_folder = '/content/cell_images/Uninfected/'

train_destination_parasite_folder = '/content/cell_data/train/Parasitized/'
test_destination_parasite_folder = '/content/cell_data/test/Parasitized/'

train_destination_uninfected_folder = '/content/cell_data/train/Uninfected/'
test_destination_uninfected_folder = '/content/cell_data/test/Uninfected/'

num_images_to_copy_train = 5000
num_images_to_copy_test = 2000

In [None]:
def copy_train_random_images(source, destination, num_images):
    # Check if the destination folder exists; if not, create it

    source = Path(source)
    destination = Path(destination)

    if not destination.exists():
        destination.mkdir(destination)

    # List all files in the source folder
    files = source.iterdir()
    img_type = ['.png', '.jpg', '.jpeg']

    # Filter files to include only images
    image_files = [file for file in files if file.suffix in img_type]

    # Assign indexes to image filenames
    indexed_images = {f'{i+1}_{image}' : image for i, image in enumerate(image_files)}

    # print(indexed_images)
    # Choose random images from the indexed list
    random_images = random.sample(list(indexed_images.values())[:6000], min(num_images, len(indexed_images)))

    # for i in random_images:
    #   print(i.parent, i.name)
    # print(destination)
    # Copy the randomly chosen images to the destination folder
    for img in random_images:

        # original_name = Path(original_name)
        # indexed_name = Path(indexed_name)

        source_file = source / img.name
        destination_file = destination / source_file.name
        # Copy the file with the new indexed name
        shutil.copy(source_file, destination_file)
        print(f'Copied: {img.name} from {img.parent}')

In [None]:
# Run the function
copy_train_random_images(parasite_source_folder, train_destination_parasite_folder, num_images_to_copy_train)

In [None]:
# Run the function
copy_train_random_images(uninfected_source_folder, train_destination_uninfected_folder, num_images_to_copy_train)

In [None]:
def copy_test_random_images(source, destination, num_images):
    # Check if the destination folder exists; if not, create it

    source = Path(source)
    destination = Path(destination)

    if not destination.exists():
        destination.mkdir(destination)

    # List all files in the source folder
    files = source.iterdir()
    img_type = ['.png', '.jpg', '.jpeg']

    # Filter files to include only images
    image_files = [file for file in files if file.suffix in img_type]

    # Assign indexes to image filenames
    indexed_images = {f'{i+1}_{image}' : image for i, image in enumerate(image_files)}

    # print(indexed_images)
    # Choose random images from the indexed list
    random_images = random.sample(list(indexed_images.values())[6000:], min(num_images, len(indexed_images)))

    # for i in random_images:
    #   print(i.parent, i.name)
    # print(destination)
    # Copy the randomly chosen images to the destination folder
    for img in random_images:

        # original_name = Path(original_name)
        # indexed_name = Path(indexed_name)

        source_file = source / img.name
        destination_file = destination / source_file.name
        # Copy the file with the new indexed name
        shutil.copy(source_file, destination_file)
        print(f'Copied: {img.name} from {img.parent}')

In [None]:
# Run the function
copy_test_random_images(parasite_source_folder, test_destination_parasite_folder, num_images_to_copy_test)

In [None]:
# Run the function
copy_test_random_images(uninfected_source_folder, test_destination_unfected_folder, num_images_to_copy_test)

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

import pytorch_lightning as pl
import torch.nn.functional as F
from pytorch_lightning import Trainer

In [None]:
# !pip install lightning

In [None]:
# Hyper-parameters
input_shape = 784  # 28x28
hidden_units = 500
output_shape = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

In [None]:
class LitNeuralNetwork(pl.LightningModule):

  def __init__(self, input_shape, hidden_units, output_shape):
    super(LitNeuralNetwork, self).__init__()
    self.input_shape = input_shape
    self.layer_1 = nn.Linear(input_shape, hidden_units)
    self.relu = nn.ReLU()
    self.layer_2 = nn.Linear(hidden_units, output_shape)
    self.validation_step_outputs = []

  def forward(self, x):
    out = self.layer_1(x)
    out = self.relu(out)
    out = self.layer_2(out)
    return out

  def training_step(self, batch, batch_idx):
    images, labels = batch
    images = images.reshape(-1, 28 * 28)

    outputs = self(images)
    loss = F.cross_entropy(outputs, labels)
    return {'loss' : loss}

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr =learning_rate)

  def train_dataloader(self):
    # MNIST dataset
    train_dataset = torchvision.datasets.MNIST(
        root="./data", train=True, transform=transforms.ToTensor(), download=True
    )
    # Data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=batch_size, num_workers=4, shuffle=False
    )
    return train_loader

  def val_dataloader(self):
        test_dataset = torchvision.datasets.MNIST(
            root="./data", train=False, transform=transforms.ToTensor()
        )

        test_loader = torch.utils.data.DataLoader(
            dataset=test_dataset, batch_size=batch_size, num_workers=4, shuffle=False
        )
        return test_loader

  def validation_step(self, batch, batch_idx):
      images, labels = batch
      images = images.reshape(-1, 28 * 28)

      # Forward pass
      outputs = self(images)

      loss = F.cross_entropy(outputs, labels)
      self.validation_step_outputs.append(loss)
      return {"val_loss": loss}

  def on_validation_epoch_end(self):
      # outputs = list of dictionaries
      avg_loss = torch.stack(self.validation_step_outputs).mean()
      tensorboard_logs = {'avg_val_loss': avg_loss}
      # use key 'log'
      return {'val_loss': avg_loss, 'log': tensorboard_logs}


  def configure_optimizers(self):
      return torch.optim.Adam(self.parameters(), lr=learning_rate)

if __name__ == '__main__':
  trainer = Trainer(max_epochs = num_epochs, fast_dev_run = False)
  model = LitNeuralNetwork(input_shape, hidden_units, output_shape)
  trainer.fit(model)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type   | Params | Mode 
-------------------------------------------
0 | layer_1 | Linear | 392 K  | train
1 | relu    | ReLU   | 0      | train
2 | layer_2 | Linear | 5.0 K  | train
-------------------------------------------
397 K     Trainable params
0         Non-trainable params
397 K     Total params
1.590     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


In [None]:
!tensorboard --logdir=lightning_logs --bind_all

2024-09-02 14:06:13.078710: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-02 14:06:13.104994: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-02 14:06:13.112494: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

TensorBoard 2.17.0 at http://3c00aeb3f536:6006/ (Press CTRL+C to quit)
Exception ignored in atexit callback: <function shutdown at 0x7cc18f6e5000>
Traceback (most recent c