In [None]:
import os
import json
import time
import copy
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, models, transforms
from torch.quantization import QuantStub, DeQuantStub, fuse_modules, quantize_dynamic


%matplotlib inline

In [None]:
seed = 42  
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"

In [None]:
"""
This module is responsible for creating the train, validation and test splits.
In particular, it creates 3 JSONs, and each of them contains an array with the paths
where the imgs are located.
"""
import json
import math
import os
import random


def cycle_path(nerfs_root):
    

    dict_result = {}

    last_two_parts = os.path.join(*os.path.splitdrive(nerfs_root)[1].split(os.sep)[-2:])
    base_folder = os.path.join('.', last_two_parts)

    for class_name in os.listdir(nerfs_root):

        class_nerf_paths = []

        subject_dirs = os.path.join(nerfs_root, class_name)

        if not os.path.isdir(subject_dirs):
            continue
        
        for subject_name in os.listdir(subject_dirs):
            subject_dir = os.path.join(subject_dirs, subject_name)
            class_nerf_paths.append(subject_dir.replace(nerfs_root, base_folder))        
        dict_result[class_name] = class_nerf_paths

    return dict_result


def create():
    root_paths = ['archive']

    train = []
    validation = []
    test = []

    TRAIN_SPLIT = 80
    VALIDATION_SPLIT = 10
    TEST_SPLIT = 10

    random.seed(1203)

    for curr_path in root_paths:
        
        # Get 
        nerfs_dict = cycle_path(curr_path)

        for class_name in nerfs_dict:

            # Get elements related to the current class
            class_elements = nerfs_dict[class_name]
            random.shuffle(class_elements)
            
            n_elements = len(class_elements)

            # Define the dimensions of the splits
            n_test = math.floor(n_elements * TEST_SPLIT / 100)
            n_validation = math.floor(n_elements * VALIDATION_SPLIT / 100)
            n_train = n_elements - n_validation - n_test

            # Make the splits according to their sizes
            train_elements = class_elements[0:n_train]
            validation_elements = class_elements[n_train:n_train+n_validation]
            test_elements = class_elements[n_train+n_validation:]
            
            # Length validation
            total_elements = len(train_elements) + len(validation_elements) + len(test_elements)
            assert total_elements == n_elements and n_test > 0 and n_validation > 0 and n_train > 0, 'Not all elements were properly used.'

            # Elements uniqueness validation
            set1 = set(train_elements)
            set2 = set(validation_elements)
            set3 = set(test_elements)

            no_common_elements = set1.isdisjoint(set2) and set1.isdisjoint(set3) and set2.isdisjoint(set3)
            assert not no_common_elements == n_elements, 'Some elements are shared between splits'

            train = train + train_elements
            validation = validation + validation_elements
            test = test + test_elements
    
    with open(os.path.join('train.json'), 'w') as file:
        json.dump(train, file)
    with open(os.path.join('validation.json'), 'w') as file:
        json.dump(validation, file)
    with open(os.path.join('test.json'), 'w') as file:
        json.dump(test, file)
    
#create()

In [None]:


def organize_images(source_paths, base_folder, split):

    for source_path in source_paths:
        if 'no_defected' in source_path:
            destination_folder = os.path.join(base_folder, split, 'no_defected')
        else:
            destination_folder = os.path.join(base_folder, split, 'defected')
        os.makedirs(destination_folder, exist_ok=True)
        filename = os.path.basename(source_path)
        destination_path = os.path.join(destination_folder, filename)
        shutil.copy(source_path, destination_path)

"""with open('train.json', 'r') as file:
    train_data = json.load(file)

with open('validation.json', 'r') as file:
    validation_data = json.load(file)

with open('test.json', 'r') as file:
    test_data = json.load(file)"""

base_folder = 'archive'

#organize_images(train_data, base_folder, 'train')

#organize_images(validation_data, base_folder, 'validation')

#organize_images(test_data, base_folder, 'test')


In [None]:
import cv2
import numpy as np
import os
import random
import shutil

base_path = 'data'
output_path = 'data_aug2'

shutil.rmtree(output_path, ignore_errors=True)
os.makedirs(output_path, exist_ok=True)

"""
def augment_images_in_folder(input_folder, output_folder):
    for class_folder in os.listdir(input_folder):
        input_class_folder_path = os.path.join(input_folder, class_folder)
        output_class_folder_path = os.path.join(output_folder, class_folder)
        os.makedirs(output_class_folder_path, exist_ok=True)

        for image_name in os.listdir(input_class_folder_path):
            input_image_path = os.path.join(input_class_folder_path, image_name)
            output_image_path = os.path.join(output_class_folder_path, image_name)

            # Carica l'immagine
            image = cv2.imread(input_image_path)

            # Converte l'immagine in bianco e nero
            grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Salva l'immagine in bianco e nero nella cartella di output
            cv2.imwrite(output_image_path, grayscale_image)
"""

def augment_images_in_folder(input_folder, output_folder):
    for class_folder in os.listdir(input_folder):
        input_class_folder_path = os.path.join(input_folder, class_folder)
        output_class_folder_path = os.path.join(output_folder, class_folder)
        os.makedirs(output_class_folder_path, exist_ok=True)

        for image_name in os.listdir(input_class_folder_path):
            input_image_path = os.path.join(input_class_folder_path, image_name)
            output_image_path = os.path.join(output_class_folder_path, image_name)

            image = cv2.imread(input_image_path)

            sepia_matrix = np.array([[0.393, 0.769, 0.189],
                                     [0.349, 0.686, 0.168],
                                     [0.272, 0.534, 0.131]])

            sepia_image = cv2.transform(image, sepia_matrix)

            sepia_image = np.clip(sepia_image, 0, 255).astype(np.uint8)

            cv2.imwrite(output_image_path, sepia_image)

augment_images_in_folder(os.path.join(base_path, 'train'), os.path.join(output_path, 'train'))
augment_images_in_folder(os.path.join(base_path, 'validation'), os.path.join(output_path, 'validation'))
augment_images_in_folder(os.path.join(base_path, 'test'), os.path.join(output_path, 'test'))


In [None]:
from torch.utils.data import ConcatDataset

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
input_path = 'data\\'
augmented_path1 = 'data_aug\\'
augmented_path2 = 'data_aug2\\'
data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ]),
    'validation':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
    ]),
    'test':
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
    ]),
}

image_datasets = {
    'train': datasets.ImageFolder(input_path + 'train', data_transforms['train']),
    'validation': datasets.ImageFolder(input_path + 'validation', data_transforms['validation']),
    'test': datasets.ImageFolder(input_path + 'test', data_transforms['test'])
}


augmented1_datasets = {
    'train': datasets.ImageFolder(os.path.join(augmented_path1, 'train'), data_transforms['train']),
    'validation': datasets.ImageFolder(os.path.join(augmented_path1, 'validation'), data_transforms['validation']),
    'test': datasets.ImageFolder(os.path.join(augmented_path1, 'test'), data_transforms['test'])
}

augmented2_datasets = {
    'train': datasets.ImageFolder(os.path.join(augmented_path2, 'train'), data_transforms['train']),
    'validation': datasets.ImageFolder(os.path.join(augmented_path2, 'validation'), data_transforms['validation']),
    'test': datasets.ImageFolder(os.path.join(augmented_path2, 'test'), data_transforms['test'])
}

# Combina i dataset originali e aumentati
combined_datasets = {
    'train': ConcatDataset([image_datasets['train'], augmented1_datasets['train'], augmented2_datasets['train']]),
    'validation': ConcatDataset([image_datasets['validation'], augmented1_datasets['validation'], augmented2_datasets['validation']]),
    'test': ConcatDataset([image_datasets['test'], augmented1_datasets['test'], augmented2_datasets['test']])
}

dataloaders = {
    'train': torch.utils.data.DataLoader(combined_datasets['train'], batch_size=32, shuffle=True, num_workers=0),
    'validation': torch.utils.data.DataLoader(combined_datasets['validation'], batch_size=32, shuffle=False, num_workers=0),
    'test': torch.utils.data.DataLoader(combined_datasets['test'], batch_size=1, shuffle=False, num_workers=0)
}


In [None]:
model = models.mobilenet_v3_large(pretrained=True).to(device)
for param in model.features.parameters():
    param.requires_grad = False
    
model.classifier = nn.Sequential(
    nn.Linear(960, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 2) 
).to(device)
print(model)


In [None]:
def train_model(model, num_epochs=3):
        
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),lr=1e-3)
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        model.to(device)
        
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(torch.float32).to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(combined_datasets[phase])
            epoch_acc = running_corrects.double() / len(combined_datasets[phase])

            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
    return model

In [None]:
model_trained = train_model(model, num_epochs=20)

In [None]:
from torch.utils.mobile_optimizer import optimize_for_mobile
torchscript_model = torch.jit.script(model_trained)
optimized_model = optimize_for_mobile(torchscript_model)
optimized_model.save("optimized_model.pt")
torchscript_model.save("script.pt")


In [None]:
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score


def test_models(model, test_dataloader):

    inference_times = []
    model.eval()
    model.to('cpu')
    correct = 0
    total = 0

    i = 0
    avg_time = 0
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')

        start_time = time.time()

        # Run inference
        outputs = model(inputs)

        end_time = time.time()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        i += len(inputs)
        avg_time += (end_time - start_time)

        # Calculate average inference time for every 10 images
        if i % 10 == 0:
            inference_time = avg_time / 10 * 1e3  # Convert to microseconds
            inference_times.append(inference_time)
            avg_time = 0  # Reset average time

    # Calculate accuracy for the entire test set
    accuracy = correct / total

    # Check if inference_times is not empty before calculating the average
    if inference_times:
        print(f"Accuracy on the test set: {accuracy:.4f}")
        print(f" - Average Inference Time: {sum(inference_times)/len(inference_times):.2f} milliseconds per image\n")





test_models(torch.jit.load('script.pt'), dataloaders['test'])

test_models(torch.jit.load('optimized_model.pt'), dataloaders['test'])



In [8]:

import torch
import time
from PIL import Image
from torchvision import transforms

x = torch.jit.load('datasets\\runs\\detect\\train\\weights\\best.torchscript').to('cpu')
#x = torch.jit.load('script.pt').to('cpu')
image = Image.open("datasets\\test\\images\\148269246_3650105938422073_232258935701890484_n_cropped_jpg.rf.780bbee8eb94051585beb171bc3d56b1.jpg").resize((224, 224))

# Define a transform to apply to the image
preprocess = transforms.Compose([
    transforms.ToTensor(),
])

# Apply the transform to the image
input_tensor = preprocess(image).unsqueeze(0)

start = time.time()
x(input_tensor)
print(time.time()-start)


0.08262848854064941
