
# Image Classification

Building a machine learning model to solve Image Classification using the PyTorch framework.<br>
Image Classification is one of the basic pattern recognition exercises. <br>
Using Image files as its input, a model trained for Image classification will split a set of images into a given number of classes. <br>
<br>
This Notebook has been generated automatically using the JupyterLab extension ***MLProvCodeGen***.
<br>
The original Source Code is from this application https://github.com/jrieke/traingenerator <br>
Made by: https://www.jrieke.com/ Twitter: https://twitter.com/jrieke


### Installs
Install required packages before running

In [1]:
#pip install numpy===1.22.2 ipywidgets===7.6.5 torch===1.10.2 torchvision===0.11.3 pytorch-ignite===0.4.6 gputil===1.4.0 psutil===5.9.0 py-cpuinfo===8.0.0 --user
#torch currently not supported with python 3.10, downgrading to python 3.9.7 possibly required


### Imports

In [2]:
import numpy as np
import torch
import ipywidgets as widgets
from torch import optim, nn
from torch.utils.data import DataLoader, TensorDataset
import torchvision as torchvision
from torchvision import models, datasets, transforms
import ignite as pytorch_ignite
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
import GPUtil
import psutil
import cpuinfo
import platform
from datetime import date
import time

### Provenance Data

In [3]:
def get_size(bytes, suffix="B"):
    """
    Scale bytes to its proper format
    e.g:
        1253656 => '1.20MB'
        1253656678 => '1.17GB'
    """
    factor = 1024
    for unit in ["", "K", "M", "G", "T", "P"]:
        if bytes < factor:
            return f"{bytes:.2f}{unit}{suffix}"
        bytes /= factor

def set_experiment_info() :
    created_by = "Tarek Al Mustafa"
    email = "tarek.almustafa@uni-jena.de"
    title = "Image Classification"
    task_type = "Image Classification Pytorch"
    creation_date = str(date.today())
    
    experiment_info = { 
            'author': created_by,
            'email': email,
            'title': title,
            'creation_date': creation_date,
            'task_type': task_type}
    
    return experiment_info

def set_hardware_info():
    uname = platform.uname()
    sysInfo = str(uname.system +' '+ uname.release +' Version: '+ uname.version +' Machine: '+ uname.machine)
    
    svmem = psutil.virtual_memory()

    GPUs = GPUtil.getGPUs()
    gpuList = []
    for gpu in GPUs:
        gpu_id = gpu.id
        gpu_name = gpu.name
        gpuList.append((gpu_id , gpu_name))

    hardware_info = {
        "Python Version": cpuinfo.get_cpu_info()['python_version'],
        "CPU": cpuinfo.get_cpu_info()['brand_raw'],
        "RAM": get_size(svmem.total),
        "Operating System": sysInfo,
        "GPUs": str(gpuList) }
    
    return hardware_info

def set_packages():
    cpuInfo_version = !pip list | grep -i py-cpuinfo
    pytorch_model_summary_version = !pip list | grep -i pytorch-model-summary
    packages = {
        "numpy" : np.__version__,
        "ipywidgets" : widgets.__version__,
        "torch" : torch.__version__,
        "torchvision" : torchvision.__version__,
        "pytorch-ignite" : pytorch_ignite.__version__,
        "gputil" : GPUtil.__version__, 
        "psutil" : psutil.__version__,
        "py-cpuinfo" : cpuInfo_version[0]}

    return packages

print(set_experiment_info())
print(set_hardware_info())
print(set_packages())

{'author': 'Tarek Al Mustafa', 'email': 'tarek.almustafa@uni-jena.de', 'title': 'Image Classification', 'creation_date': '2022-03-21', 'task_type': 'Image Classification Pytorch'}
{'Python Version': '3.9.7.final.0 (64 bit)', 'CPU': 'AMD Ryzen 7 3700X 8-Core Processor', 'RAM': '15.95GB', 'Operating System': 'Windows 10 Version: 10.0.19041 Machine: AMD64', 'GPUs': "[(0, 'NVIDIA GeForce GTX 1060 6GB')]"}
{'numpy': '1.22.2', 'ipywidgets': '7.6.5', 'torch': '1.10.2+cpu', 'torchvision': '0.11.3+cpu', 'pytorch-ignite': '0.4.6', 'gputil': '1.4.0', 'psutil': '5.9.0', 'py-cpuinfo': 'py-cpuinfo                    8.0.0'}


### Setup

In [4]:
# Dataset  will be loaded further down.

# Set up hyperparameters.
lr = 0.001
batch_size = 128
num_epochs = 3

# Set up logging.
print_every = 1  # batches

# Set up device.
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

### Data Ingestion

In [5]:
dataset = getattr(datasets, 'MNIST')


### Data Preperation

In [6]:
transform = transforms.Compose([
    transforms.Resize(256), 
    transforms.CenterCrop(224), 
    transforms.ToTensor(), 
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # grayscale to RGB
])


### Data Segregation

In [9]:
# Wrap in data loader.
training_dataset = dataset("./data", train=True, download=True, transform=transform)
#training_dataset = datasets.MNIST("./data", train=True, download=True, transform=transform)
testing_dataset = dataset("./data", train=False, download=True, transform=transform)

if use_cuda:
    kwargs = {"pin_memory": True, "num_workers": 1}
else:
    kwargs = {}

train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = DataLoader(testing_dataset, batch_size=batch_size, shuffle=False, **kwargs)
val_loader = None

print(training_dataset.__len__)    
print(testing_dataset.__len__)

def set_data_ingestion():
    dataInfo = training_dataset.__len__
    root_location =  str(dataInfo).splitlines()[2]
    transform_method = str(dataInfo).splitlines()[4:11]
    data_ingestion = {
        "dataset_id" : 'MNIST',
        "feature_classes" : 10,
        "training_samples" : training_dataset.__len__(),
        "testing_samples" : testing_dataset.__len__(),
        "root_location" : root_location,
        "preprocessing" : transform_method}
    
    return data_ingestion
set_data_ingestion()

<bound method MNIST.__len__ of Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Lambda()
           )>
<bound method MNIST.__len__ of Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Lambda()
           )>


{'dataset_id': 'MNIST',
 'feature_classes': 10,
 'training_samples': 60000,
 'testing_samples': 10000,
 'root_location': '    Root location: ./data',
 'preprocessing': ['    StandardTransform',
  'Transform: Compose(',
  '               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)',
  '               CenterCrop(size=(224, 224))',
  '               ToTensor()',
  '               Lambda()',
  '           )>']}

### Model

In [8]:
# Set up model, loss, optimizer.
model = models.AlexNet(pretrained=0)
num_classes = 1000
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)



TypeError: __init__() got an unexpected keyword argument 'pretrained'

### Training

In [None]:
# Set up pytorch-ignite trainer and evaluator.
trainer = create_supervised_trainer(
    model,
    optimizer,
    loss_func,
    device=device,
)
metrics = {
    "accuracy": Accuracy(),
    "loss": Loss(loss_func),
}
evaluator = create_supervised_evaluator(
    model, metrics=metrics, device=device
)

@trainer.on(Events.ITERATION_COMPLETED(every=print_every))
def log_batch(trainer):
    batch = (trainer.state.iteration - 1) % trainer.state.epoch_length + 1
    print(
        f"Epoch {trainer.state.epoch} / {num_epochs}, "
        f"batch {batch} / {trainer.state.epoch_length}: "
        f"loss: {trainer.state.output:.3f}"
    )

@trainer.on(Events.EPOCH_COMPLETED)
def log_epoch(trainer):
    print(f"Epoch {trainer.state.epoch} / {num_epochs} average results: ")

    def log_results(name, metrics, epoch):
        print(
            f"{name + ':':6} loss: {metrics['loss']:.3f}, "
            f"accuracy: {metrics['accuracy']:.3f}"
        )

    # Train data.
    evaluator.run(train_loader)
    log_results("train", evaluator.state.metrics, trainer.state.epoch)
    
    # Val data.
    if val_loader:
        evaluator.run(val_loader)
        log_results("val", evaluator.state.metrics, trainer.state.epoch)

    # Test data.
    if test_loader:
        evaluator.run(test_loader)
        log_results("test", evaluator.state.metrics, trainer.state.epoch)

    print()
    print("-" * 80)
    print()

# Start training.
trainer.run(train_loader, max_epochs=num_epochs)


### Generate Provenance Data

In [None]:
set_experiment_info()
set_hardware_info()
set_packages()
set_data_ingestion()
set_data_preparation()
set_data_segregation()
set_model_parameters()
set_training()
set_evaluation()

### Write Provenance Data

In [None]:
timestring = time.strftime('%Y%m%d-%H%M%S')
timestring
ProvenanceName = ('Provenance_MulticlassClassification_' + timestring + '.json')

with open('../GeneratedProvenanceData/'+ProvenanceName, 'w') as prov_file:
    prov_file.write('{' + '\n  ')
    prov_file.write('"experiment_info":' + json.dumps(set_experiment_info(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"hardware_info":' + json.dumps(set_hardware_info(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"packages":' + json.dumps(set_packages(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"data_ingestion":' + json.dumps(set_data_ingestion(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"data_preparation":' + json.dumps(set_data_preparation(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"data_segregation":' + json.dumps(set_data_segregation(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"model_parameters":' + json.dumps(set_model_parameters(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"training":' + json.dumps(set_training(),sort_keys=False, indent=4) +',' + '\n\n' )
    prov_file.write('"evaluation":' + json.dumps(set_evaluation(),sort_keys=False, indent=4) + '\n' )
    prov_file.write('}')

### Open Provenance Data

In [None]:
provenance_open = widgets.Button(description = 'Open Provenance Data File')
display(provenance_open)

def on_button_clicked(b):
    provenance_open.on_click = webbrowser.open('http://localhost:8888/lab/tree/GeneratedProvenanceData/'+ProvenanceName)

provenance_open.on_click(on_button_clicked)