# **imports**

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from copy import deepcopy
from torchvision.models import mobilenet_v2
from torchvision import models
import sys
from torchsummary import summary
from torchvision.models import MobileNet_V2_Weights
import torch.nn.functional as F







# **global variables**

In [2]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~global variables~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
batch_size = 512
optimizer_type ='Adam' #'SGD' # 'RMSProp'
momentum = 0.0
#Flag indicating whether to use CUDA (GPU acceleration) if available
cuda = True
seed = 42
# Set seed , every time you run the code, you will get the same random numbers generated from numpy.random and torch.random
np.random.seed(seed)
torch.manual_seed(seed)

#Checks if CUDA is available and the cuda flag is set to True
use_cuda = cuda and torch.cuda.is_available()

#if we are using CUDA, set the seed for the GPU as well so the random numbers generated on the GPU are the same as before with the same seed
if use_cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# Handel GPU stochasticity
#to ensure that the results are reproducible and not random even on GPU, we need to set the following flags
#there is no connection to the seed also if the seed is different, the results will be the same deterministic
torch.backends.cudnn.enabled = use_cuda
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = True


# use GPU if available
#device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Specifies the device to be used for computation (CPU or GPU).
device = torch.device("cuda" if use_cuda else "cpu")

# **loaders**

In [3]:

def load_data():
    """
    load the STL-10 dataset
    :return: the trainloader ,testloader and the visualize loader of the dataset.
             in addition, the transform for the trainset and the testset
    """
    global batch_size
    #TODO: how i know the size of normalization (mean and std) for the STL-10 dataset


    transform_test = transforms.Compose([transforms.CenterCrop(size=(64,64)),
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))])

    #create transform for the visualization
    transform_visualize = transforms.Compose([transforms.ToTensor()])

    # create the transform for the trainset and testset
    transform_train = transforms.Compose(
        [transforms.RandomCrop(size=(64, 64)),
         transforms.ToTensor(),
         transforms.Normalize((0.507, 0.487, 0.441), (0.267, 0.256, 0.276)),
         # data augmentation
         transforms.RandomHorizontalFlip(p=0.5),
         transforms.RandomRotation(degrees=8)
         ])

    #create the trainset and testset
    trainset = torchvision.datasets.STL10(root='./data', split='train',download=True, transform=None)
    testset = torchvision.datasets.STL10(root='./data', split='test', download=False, transform=transform_test)
    visualize_set= torchvision.datasets.STL10(root='./data', split='train', download=False, transform=transform_visualize)

    #create the differnet loaders
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=1)
    testloader = torch.utils.data.DataLoader(testset, batch_size=testset.data.shape[0],shuffle=False, num_workers=1)

    visualize_loader = torch.utils.data.DataLoader(visualize_set, batch_size=batch_size,shuffle=True, num_workers=1)

    return trainloader, testloader, visualize_loader,transform_train,transform_test




# **visualize** **data**

In [4]:
def visualize_data(dataset):
    """
    visualize the data by show 4 images per class
    :param dataset: the dataset we want to visualize
    :return: None
    """
    #TODO:ask ran if the we need to show image as 3X96X96 or 3X64X64

    # STL10 classes
    classes = ('airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck')

    # Create a grid of 10x4
    fig, axs = plt.subplots(10, 4, figsize=(15, 15))
    #fig.tight_layout(pad=1.0)

    # Loop over each class
    for i, _class in enumerate(classes):
        # Get the indexes of the images with the same class,each class has a list of indexes
        idxs = [j for j, label in enumerate(dataset.labels) if label == i]
        for j in range(4):
            #get the image of the corresponding index
            img = dataset[idxs[j]][0]
            # unnormalize the image to [0,1] for visualization
            #because the image is normalized to [-1,1] in the load_data function
            img = img / 2 + 0.5
            # convert the tensor to numpy for visualization
            npimg = img.numpy()
            # transpose the image to (H,W,C) from (C,H,W)
            axs[i, j].imshow(np.transpose(npimg, (1, 2, 0)))
            axs[i, j].axis('off')

        # Add the class name at the beginning of each row on the left side
        axs[i, 0].text(-15, 15, classes[i], va='center', ha='right')

    plt.show()



# **show agumentation**

In [5]:
def show_agumentation(dataset):
    """
    this function will show the agumentation of the dataset
    the first agumenation will be horizontal flip and the second will be rotation by 8 degrees
    :param dataset:  the dataset we to show how the agumentation affect the images
    :return: None
    """
    #take a random image from the dataset
    index = np.random.randint(0, len(dataset))
    img = dataset[index][0]

    #define the agumentation transformataion of horizontal flip
    transform_horizontal_flip = transforms.Compose([transforms.RandomHorizontalFlip(p=1)])

    #define the agumentation transformataion of rotation
    transform_rotation = transforms.Compose([transforms.RandomRotation(degrees=8)])

    #create the figure that will store the images
    #the first image in each row will be the original image, and the second image will be the agumentation
    fig, ax = plt.subplots(2, 2, figsize=(15, 15))

    horizotal_img = transform_horizontal_flip(img)
    rotation_img = transform_rotation(img)

    #show the images
    ax[0, 0].imshow(np.transpose(img.numpy(), (1, 2, 0)))
    ax[0, 0].set_title('original image')
    ax[0, 0].axis('off')
    ax[0, 1].imshow(np.transpose(horizotal_img.numpy(), (1, 2, 0)))
    ax[0, 1].set_title('horizontal flip')
    ax[0, 1].axis('off')
    ax[1, 0].imshow(np.transpose(img.numpy(), (1, 2, 0)))
    ax[1, 0].set_title('original image')
    ax[1, 0].axis('off')
    ax[1, 1].imshow(np.transpose(rotation_img.numpy(), (1, 2, 0)))
    ax[1, 1].set_title('rotation')
    ax[1, 1].axis('off')

    plt.tight_layout()
    plt.show()

# **split train and validation**

In [6]:
def split_train_val(trainset,train_size=0.85):
    """
    split the trainset to trainset and valset
    :param trainset: the trainset we want to split
    :param train_size: the size of the trainset in precentage
    :return: the trainset and valset
    """
    train_ind, valset_ind = torch.utils.data.random_split(trainset,[int(train_size * len(trainset)),len(trainset) - int(train_size * len(trainset))])

    valset = deepcopy(trainset)
    valset.data = valset.data[valset_ind.indices]
    valset.labels = valset.labels[valset_ind.indices]
    trainset.data = trainset.data[train_ind.indices]
    trainset.labels = trainset.labels[train_ind.indices]

    return trainset, valset




# **train function**

In [7]:
def train(model, data_loader, val_loader,loss_function, optimizer,num_of_epochs):
    """
    this function will train the model
    :param model: the model we want to train
    :param data_loader: the data loader of the trainset
    :param val_loader: the data loader of the valset
    :param num_of_epochs: the number of the epochs we want to train the model
    :return: the lost and accuarcy of the model during the validation and train set
    """
    global device

    #~~~~~~~~~~~~~~~~~~~~~~~~train~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #move the model to the device
    model.to(device)

    #save the lost of the model during the validation and train set
    # also save the accuracy
    loss_train = []
    loss_validation = []
    accuracy_train = []
    accuracy_validation = []
    #best_accuracy = 0
    #best_weights = self.weights

    #go over the epochs
    for epoch in range(num_of_epochs):
        mini_batch_loss = []
        mini_batch_accuracy = []
        #go over the mini batches
        for X_i, Y_i in data_loader:
            #move the mini batch to the device
            X_i, Y_i = X_i.to(device), Y_i.to(device)
            #first do a forward pass to get the predictions
            y_hat = model(X_i)
            #calculate the loss of the mini batch
            loss = loss_function(y_hat, Y_i)
            #backpropagation pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            #calculate the accuracy of the mini batch
            mini_batch_accuracy.append(torch.mean((Y_i == torch.argmax(y_hat,axis=1)).float()).item())
            #add the lost of the mini batch to the list
            mini_batch_loss.append(loss.item())

        #calculate the lost and the accuracy of the current epoch
        loss_train.append(np.mean(mini_batch_loss))
        accuracy_train.append(np.mean(mini_batch_accuracy))

        #~~~~~~~~~~~~~~~~~~~~~~~~validation~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        #set the model to evaluation mode
        model.eval()
        #dont calculate the gradients
        with torch.no_grad():
            #get the validation set
            X_validation, y_validation = next(iter(val_loader))
            #move the validation set to the device
            X_validation, y_validation = X_validation.to(device), y_validation.to(device)
            #first do a forward pass to get the predictions
            y_hat_val = model(X_validation)
            #calculate the lost of the validation set
            loss_val = loss_function(y_hat_val,y_validation)
            #calculate the accuracy of the validation set
            #mini_batch_accuracy.append(torch.mean((yi == torch.argmax(yhat)).float()).item())
            accuracy_val=torch.mean((y_validation == torch.argmax(y_hat_val,axis=1)).float()).item()

        #save the validation lost and accuracy of the validation set
        loss_validation.append(loss_val.item())
        accuracy_validation.append(accuracy_val)

            #print the results of the current epoch
        sys.stdout.write('\r' + f'Epoch: {epoch + 1}')
        sys.stdout.write(f'Train Loss: {loss_train[-1]:.3f} | '
                         f'Train Accuracy: {100 * accuracy_train[-1]:.3f}% ')
        sys.stdout.write(f'Validation Loss: {loss_validation[-1]:.3f} | '
                         f'Validation Accuracy: {100 * accuracy_validation[-1]:.3f}%')
        sys.stdout.flush()


    #return the lost and the accuracy of the model during the validation and train set
    return loss_train,loss_validation,accuracy_train,accuracy_validation,model



# **show plots**

In [8]:
def show_plots(lost_train,lost_validation,accuracy_train,accuracy_validation):
    """
    this function will show the plots of the lost and the accuracy
    :param lost_train: the lost of the train
    :param lost_validation: the lost of the validation
    :param accuracy_train: the accuracy of the train
    :param accuracy_validation: the accuracy of the validation
    """
    #create a figure that will store the plots
    fig, ax = plt.subplots(2, 1, figsize=(15, 15))
    #show the lost plot
    ax[0].plot(lost_train, label='train', linewidth=2,marker='o')
    ax[0].plot(lost_validation, label='validation', linewidth=2,marker='v')
    #give a title to the axis
    ax[0].set_xlabel('epochs',fontsize=15)
    ax[0].set_ylabel('lost',fontsize=15)
    #give a title to the plot
    ax[0].set_title('lost as a function of the epochs',fontsize=17)
    ax[0].legend(fontsize=12)
    #add grid
    ax[0].grid(True)
    #show the accuracy plot
    ax[1].plot(accuracy_train, label='train',linewidth=2,marker='o')
    ax[1].plot(accuracy_validation, label='validation',linewidth=2,marker='v')
    #give a title to the axis
    ax[1].set_xlabel('epochs',fontsize=15)
    ax[1].set_ylabel('accuracy',fontsize=15)
    #give a title to the plot
    ax[1].set_title('accuracy as a function of the epochs',fontsize=17)
    ax[1].legend(fontsize=12)
    #add grid
    ax[1].grid(True)
    #adjust space between the plots
    plt.subplots_adjust(hspace=0.4)
    #show the plots
    plt.show()




# **flatten logistic regression**

In [9]:
class LogisticRegression(nn.Module):
    def __init__(self, num_of_classes, image_size=64*64*3):
        """
        constructor
        :param input_size: the input size
        :param num_classes: the number of classes
        """
        super(LogisticRegression, self).__init__()
        #flatten the image to a vector
        self.flatten = nn.Flatten()
        #linear layer
        self.linear = nn.Linear(image_size, num_of_classes)

    def forward(self, x):
        """
        forward function
        :param x: the input
        :return: the output
        """
        #flatten the image and pass it through the linear layer
        predictions = self.flatten(x)
        predictions = self.linear(predictions)

        return predictions


# **ANN**

In [10]:
class ANN(nn.Module):
    def __init__(self, num_of_classes,hidden_layer_size,dropout_prob,batch_norm_lengths,num_of_hidden_layers=3, image_size=64*64*3):
        """
        constructor
        :param input_size: the input size
        :param num_classes: the number of classes
        :param hidden_layer_size: for each hidden layer, we have a tuple of the size  (input_size, output_size)
        :param num_of_hidden_layers: the number of hidden layers
        :param dropout_prob: the dropout probability for each layer
        :param batch_norm_lengths: the length of each batch normalization layer
        """
        super(ANN, self).__init__()
        #flatten the image to a vector
        self.flatten = nn.Flatten()

        #batch normalization
        self.batch_norm_layers = nn.ModuleList()
        #create the batch normalization layers
        for i in range(len(batch_norm_lengths)):
            self.batch_norm_layers.append(nn.BatchNorm1d(batch_norm_lengths[i]))

        #dropout
        self.dropout_list = nn.ModuleList()
        #create the dropout layers
        for i in range(num_of_hidden_layers):
            self.dropout_list.append(nn.Dropout(p=dropout_prob[i]))

        #input layer
        self.input = nn.Linear(image_size, hidden_layer_size[0][0])

        #create a list of hidden layers
        self.hidden_layers = nn.ModuleList()

        #create the hidden layers
        for i in range(num_of_hidden_layers):
            self.hidden_layers.append(nn.Linear(hidden_layer_size[i][0], hidden_layer_size[i][1]))

        #output layer
        self.output = nn.Linear(hidden_layer_size[-1][1], num_of_classes)


    def forward(self, x):
        """
        forward function
        :param x: the input
        :return: the output (predictions)
        """
        #flatten the image
        x = self.flatten(x)
        #pass through the input layer
        x = self.input(x)
        #pass through the activation function
        x = torch.relu(x)
        #pass through the hidden layers
        for i in range(len(self.hidden_layers)):
            x = self.hidden_layers[i](x)
            x = self.batch_norm_layers[i](x)
            x = torch.relu(x)
            x = self.dropout_list[i](x)

        #pass through the output layer
        x = self.output(x)

        return x


# **CNN**

In [11]:
class CNN(nn.Module):
    def __init__(self,dropout_prob,batch_norm_lengths,conv_size,kernel_pooling_size,linear_size, num_of_classes=10,num_of_conv_layers=2,num_of_linear_layers=2):
        """
        constructor
        :droupout_prob: the dropout probability for each layer
        :batch_norm_lengths: the length of each batch normalization layer
        :conv_size: the params of each convolutional layers
        :kernel_pooling_size: the size of the pooling kernel
        :linear_size: the size of the linear layers
        :num_of_classes: the number of classes
        :num_of_conv_layers: the number of convolutional layers
        :num_of_linear_layers: the number of linear layers
        """
        super(CNN, self).__init__()

        #convolutional layers
        self.conv_layers = nn.ModuleList()
        self.pooling_layers = nn.ModuleList()
        #create the convolutional layers
        for i in range(num_of_conv_layers):
            #add the convolutional layer
            self.conv_layers.append(nn.Conv2d(conv_size[i][0], conv_size[i][1], conv_size[i][2],conv_size[i][3],conv_size[i][4]))
            #add the pooling layer
            self.pooling_layers.append(nn.MaxPool2d(kernel_pooling_size[i]))

       #batch normalization for the convolutional layers
        self.batch_norm_layers = nn.ModuleList([nn.BatchNorm2d(batch_norm_lengths[i]) for i in range(len(batch_norm_lengths))])

        #linear layers
        #flatten the image
        self.flatten = nn.Flatten()

        #dropout
        self.dropout_list = nn.ModuleList([nn.Dropout(p=dropout_prob[i]) for i in range(num_of_linear_layers)])
        #create the linear layers
        self.linear1=nn.Linear(linear_size[0][0], linear_size[0][1])
        self.linear2=nn.Linear(linear_size[1][0], linear_size[1][1])
        self.output=nn.Linear(linear_size[1][1], num_of_classes)

    def forward(self, x):
        """
        forward function
        :param x: the input
        :return: the output (predictions)
        """
        #pass through the convolutional layers
        for i in range(len(self.conv_layers)):
            x = self.conv_layers[i](x)
            x = self.batch_norm_layers[i](x)
            x = self.pooling_layers[i](x)
            x = torch.relu(x)

        #flatten the image
        x = self.flatten(x)

        #pass through the linear layers
        x = self.linear1(x)
        x = torch.relu(x)
        x = self.dropout_list[0](x)
        x = self.linear2(x)
        x = torch.relu(x)
        x = self.dropout_list[1](x)
        x = self.output(x)

        return x

    @staticmethod
    def output_channels(input_size, kernel_size, padding, stride):
        """
        calculate the output size of the convolutional layer
        :param input_size: the input size
        :param kernel_size: the kernel size
        :param padding: the padding size
        :param stride: the stride size
        :return: the output size
        """
        return int((input_size - kernel_size + 2 * padding) / stride) + 1

# **fixed pre-trained MobileNetV2**

In [12]:
class mobileNetV2(nn.Module):
    def __init__(self,linear_size,dropout_prob,num_of_linear_layer=2, num_of_classes=10):
        """
        constructor
        :param num_of_classes: the number of classes
        :param linear_size: the size of the linear layers (input_size, output_size)
        :param dropout_prob: the dropout probability for each layer
        :param num_of_linear_layer: the number of linear layers in the model not including the output layer

        """
        super(mobileNetV2, self).__init__()
        #load the mobilenetv2 model
        self.model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)

        #dont change the weights of the model
        for param in self.model.parameters():
            param.requires_grad = False
        #create the linear layers
        self.flatten = nn.Flatten()
        self.linear = nn.ModuleList([nn.Linear(size[0], size[1]) for size in linear_size ])
        self.output=nn.Linear(linear_size[-1][1], num_of_classes)

        #dropout
        self.dropout_list = nn.ModuleList([nn.Dropout(p=dropout_prob[i]) for i in range(num_of_linear_layer)])

        #batch normalization
        self.batch_norm_layers = nn.ModuleList([nn.BatchNorm1d(size[1]) for size in linear_size])



    def forward(self, x):
        """
        forward function
        :param x: the input
        :return: the output (predictions)
        """
        #pass through the mobilenetv2 model
        x = self.model(x)
        #flatten the image
        x = self.flatten(x)
        #pass through the linear layers
        for i in range(len(self.linear)):
            x = self.linear[i](x)
            x = self.batch_norm_layers[i](x)
            x = torch.relu(x)
            x = self.dropout_list[i](x)
        #pass through the output layer
        x = self.output(x)

        return x

    @staticmethod
    def clac_input_size():
        """
        calculate the input size of the first linear layer
        :return: the input size
        """
        return models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).classifier[1].out_features

# **learned pre-trained MobileNetV2**

In [13]:
class mobileNetV2Learned(nn.Module):
    def __init__(self, linear_size, dropout_prob, num_of_linear_layer=2, num_of_classes=10):
        """
        constructor
        :param num_of_classes: the number of classes
        :param linear_size: the size of the linear layers (input_size, output_size)
        :param dropout_prob: the dropout probability for each layer
        :param num_of_linear_layer: the number of linear layers in the model not including the output layer

        """
        super(mobileNetV2Learned, self).__init__()
        # load the mobilenetv2 model
        self.model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)

        # change the weights of the model
        for param in self.model.parameters():
            param.requires_grad = True
        # create the linear layers
        self.flatten = nn.Flatten()
        self.linear = nn.ModuleList([nn.Linear(size[0], size[1]) for size in linear_size])
        self.output = nn.Linear(linear_size[-1][1], num_of_classes)

        # dropout
        self.dropout_list = nn.ModuleList([nn.Dropout(p=dropout_prob[i]) for i in range(num_of_linear_layer)])

        # batch normalization
        self.batch_norm_layers = nn.ModuleList([nn.BatchNorm1d(size[1]) for size in linear_size])

    def forward(self, x):
        """
        forward function
        :param x: the input
        :return: the output (predictions)
        """
        # pass through the mobilenetv2 model
        x = self.model(x)
        # flatten the image
        x = self.flatten(x)
        # pass through the linear layers
        for i in range(len(self.linear)):
            x = self.linear[i](x)
            x = self.batch_norm_layers[i](x)
            x = torch.relu(x)
            x = self.dropout_list[i](x)
        # pass through the output layer
        x = self.output(x)


        return x

    @staticmethod
    def clac_input_size():
        """
        calculate the input size of the first linear layer
        :return: the input size
        """
        return models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).classifier[1].out_features

# **test function**

In [14]:
def test(model, test_loader,loss_function):
    """
    this function will test the model
    :param model: the model we want to test
    :param test_loader: the data loader of the testset
    :return: the lost and the accuracy of the model during the test set
    """
    #set the model to evaluation mode
    model.eval()
    #dont calculate the gradients
    with torch.no_grad():
        total_loss = 0
        total_accuracy = 0
        count = 0
        for X_test, y_test in test_loader:
            # move the testset to the device
            X_test, y_test = X_test.to(device), y_test.to(device)
            # first do a forward pass to get the predictions
            y_hat_test = model(X_test)
            # calculate the loss of the testset
            loss_test = loss_function(y_hat_test, y_test)
            total_loss += loss_test.item()
            # calculate the accuracy of the testset
            accuracy_test = torch.mean((y_test == torch.argmax(y_hat_test, axis=1)).float()).item()
            total_accuracy += accuracy_test
            count += 1

        average_loss = total_loss / count
        average_accuracy = total_accuracy / count

    #print the results of the test
    print(f'Test Loss: {average_loss:.3f} | '
          f'Test Accuracy: {100 * average_accuracy:.3f}%')

# **main function - run the different models**

In [None]:
def main():
    """
    main function
    :return: None
    """
    global batch_size
    #get the trainloader and testloader
    trainloader, testloader,visulaztion_loader,transform_train, transform_test = load_data()

    #visualize the data
    visualize_data(visulaztion_loader.dataset)

    #show the agumentation
    show_agumentation(visulaztion_loader.dataset)

    #split the trainset to trainset and valset
    trainset, valset = split_train_val(trainloader.dataset)

    #apply the transformation on the trainset and the valset
    #the transformation will be different for the trainset and the valset because we dont want to apply agumentation on the valset
    trainset.transform = transform_train
    valset.transform = transform_test

    # create the dataloaders for the trainset and valset
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True,drop_last=True)
    valloader = DataLoader(valset, batch_size=valset.data.shape[0], shuffle=False)


    #~~~~~~~~~~~logistic regression~~~~~~~~~~~~~~~~~~~~~~
    flatten_model_weight_decay=0.005
    flatten_model_lr=0.001
    flatten_model_num_of_epochs=20

    model_logistic = LogisticRegression(10)

    # create the optimizer
    # optimizer type
    if optimizer_type == 'SGD':
        optimizer_logistic  = optim.SGD(model_logistic.parameters(), lr=flatten_model_lr, momentum=momentum, weight_decay=flatten_model_weight_decay)
    elif optimizer_type == 'Adam':
        optimizer_logistic  = optim.Adam( model_logistic.parameters(), lr=flatten_model_lr, betas=(0.9, 0.999), weight_decay=flatten_model_weight_decay)
    elif optimizer_type == 'RMSProp':
        optimizer_logistic  = optim.RMSprop( model_logistic.parameters(), lr=flatten_model_lr, alpha=0.99, eps=1e-08, weight_decay=flatten_model_weight_decay)
    else:
        NotImplementedError("optimizer not implemented")



    #create the loss function
    loss_func_logistic=nn.CrossEntropyLoss()

    #train the model
    loss_train_logitic, loss_valdition_logistic, accuracy_train_logistic, accuracy_validtion_logitic ,model_logistic = \
        (train(model_logistic,trainloader,valloader,loss_func_logistic,optimizer_logistic,flatten_model_num_of_epochs))

    #show the plots
    show_plots(loss_train_logitic,loss_valdition_logistic,accuracy_train_logistic,accuracy_validtion_logitic)

    #test the model
    test(model_logistic,testloader,loss_func_logistic)

    #~~~~~~~~~~~ANN~~~~~~~~~~~~~~~~~~~~~~
    ANN_lr=0.001
    ANN_weight_decay=0.001
    num_of_epochs_ANN=34
    model_ANN = ANN(10,hidden_layer_size=[(2048,1024),(1024,512),(512,256),(256,128)],dropout_prob=[0.1,0.08,0.05,0.03],batch_norm_lengths=[1024,512,256,128],num_of_hidden_layers=4)


    # create the optimizer
    # optimizer type
    if optimizer_type == 'SGD':
        optimizer_ANN  = optim.SGD(model_ANN.parameters(), lr=ANN_lr, momentum=momentum, weight_decay=ANN_weight_decay)
    elif optimizer_type == 'Adam':
        optimizer_ANN  = optim.Adam(model_ANN.parameters(), lr=ANN_lr, betas=(0.9, 0.999), weight_decay=ANN_weight_decay)
    elif optimizer_type == 'RMSProp':
        optimizer_ANN  = optim.RMSprop(model_ANN.parameters(), lr=ANN_lr, alpha=0.99, eps=1e-08, weight_decay=ANN_weight_decay)
    else:
        NotImplementedError("optimizer not implemented")


    #create the loss function
    loss_func_ANN = nn.CrossEntropyLoss()
    #loss_func_ANN=nn.NLLLoss()

    #train the model
    loss_train_ANN, loss_valdition_ANN, accuracy_train_ANN, accuracy_validtion_ANN ,model_ANN = \
        (train(model_ANN,trainloader,valloader,loss_func_ANN,optimizer_ANN,num_of_epochs_ANN))

    #show the plots
    show_plots(loss_train_ANN,loss_valdition_ANN,accuracy_train_ANN,accuracy_validtion_ANN)

    #test the model
    test(model_ANN,testloader,loss_func_ANN)

   #check how many parameters the model has
    print(summary(model_ANN, (3, 64, 64)))

    #~~~~~~~~~~~CNN~~~~~~~~~~~~~~~~~~~~~~
    CNN_lr=0.01
    CNN_weight_decay=0.001
    num_of_epochs_CNN=40
    CNN_opitmize_type='RMSProp'
    #set the number of conv layers and linear layers
    conv_layers = 3
    hidden_linear_layers = 2
    #for each conv layer we define the size of the input and output channels,the kernel size ,the stride and the padding
    conv_sizes = [[3,12,3,1,0],[12,12,3,1,0],[12,42,3,1,0]]
    #set the size of the kernel pooling
    kernel_pooling_size = [2, 4, 2]

    #clculate the size of the input of the first linear layer
    input_linear_size=64
    for i in range(conv_layers):
        #calculate the size after the convolutional layer
        input_linear_size = CNN.output_channels(input_linear_size,conv_sizes[i][2],conv_sizes[i][4],conv_sizes[i][3])
        #calculate the size after the pooling layer
        input_linear_size = input_linear_size // kernel_pooling_size[i]

    #before the first linear layer we need to flatten the image,the image will be in the shape of(input_linear_size,input_linear_size,cov_sizes[-1][1])
    input_linear_size = input_linear_size * input_linear_size * conv_sizes[-1][1]



    model_CNN = CNN(dropout_prob=[0.03,0.01],batch_norm_lengths=[12,12,42],conv_size=conv_sizes,kernel_pooling_size=kernel_pooling_size,linear_size=[(input_linear_size,200),(200,100)],num_of_conv_layers=3)

    #check how many parameters the model has
    print(summary(model_CNN, (3, 64, 64)))

    # create the optimizer
    # optimizer type
    if  CNN_opitmize_type == 'SGD':
        optimizer_CNN  = optim.SGD(model_CNN.parameters(), lr=CNN_lr, momentum=momentum, weight_decay=CNN_weight_decay)
    elif optimizer_type == 'Adam':
        optimizer_CNN  = optim.Adam(model_CNN.parameters(), lr=CNN_lr, betas=(0.9, 0.999), weight_decay=CNN_weight_decay)
    elif optimizer_type == 'RMSProp':
        optimizer_CNN  = optim.RMSprop(model_CNN.parameters(), lr=CNN_lr, alpha=0.99, eps=1e-08, weight_decay=CNN_weight_decay)
    else:
        NotImplementedError("optimizer not implemented")


    #create the loss function
    loss_func_CNN = nn.CrossEntropyLoss()

    #train the model
    loss_train_CNN, loss_valdition_CNN, accuracy_train_CNN, accuracy_validtion_CNN ,model_CNN = \
        (train(model_CNN,trainloader,valloader,loss_func_CNN,optimizer_CNN, num_of_epochs_CNN))

    #show the plots
    show_plots(loss_train_CNN,loss_valdition_CNN,accuracy_train_CNN,accuracy_validtion_CNN)

    #test the model
    test(model_CNN,testloader,loss_func_CNN)

    # #~~~~~~~~~~~mobile net v2~~~~~~~~~~~~~~~~~~~~~~
    mobileNet_lr=0.001
    mobileNet_weight_decay=0.001
    num_of_epochs_mobileNet=30
    # get the size of the input of the first linear layer
    input_linear_size = mobileNetV2.clac_input_size()

    model_v2 = mobileNetV2(linear_size=[(input_linear_size,200),(200,100)],dropout_prob=[0.08,0.03])

    # create the optimizer
    # optimizer type
    if optimizer_type == 'SGD':
        optimizer_v2 = optim.SGD(model_v2.parameters(), lr=mobileNet_lr, momentum=momentum, weight_decay= mobileNet_weight_decay)
    elif optimizer_type == 'Adam':
        optimizer_v2 = optim.Adam(model_v2.parameters(), lr=mobileNet_lr, betas=(0.9, 0.999), weight_decay= mobileNet_weight_decay)
    elif optimizer_type == 'RMSProp':
        optimizer_v2 = optim.RMSprop(model_v2.parameters(), lr=mobileNet_lr, alpha=0.99, eps=1e-08, weight_decay= mobileNet_weight_decay)
    else:
        NotImplementedError("optimizer not implemented")

    #optimizer_v2 = optim.Adam(model_v2.parameters(), lr=0.001,weight_decay=weight_decay)

    #create the loss function
    loss_func_v2 = nn.CrossEntropyLoss()

    #train the model
    loss_train_v2, loss_valdition_v2, accuracy_train_v2, accuracy_validtion_v2 ,model_v2 = \
        (train(model_v2,trainloader,valloader,loss_func_v2,optimizer_v2,num_of_epochs_mobileNet))

    #show the plots
    show_plots(loss_train_v2,loss_valdition_v2,accuracy_train_v2,accuracy_validtion_v2)

    #test the model
    test(model_v2,testloader,loss_func_v2)

    #~~~~~~~~~~~~~~mobilenet v2 learning weights ~~~~~~~~~~~~~~~~~~
    mobileNetLearning_lr=0.0005
    mobileNetLearning_weight_decay=0.0005
    num_of_epochs_mobileNetLearning=20
    # get the size of the input of the first linear layer
    input_linear_size = mobileNetV2Learned.clac_input_size()

    model_v2_learning_weights = mobileNetV2Learned(linear_size=[(input_linear_size,200),(200,50)],dropout_prob=[0.08,0.01])

    # # create the optimizer
    # optimizer_v2_learning_weights = optim.Adam(model_v2_learning_weights.parameters(), lr=lr)
    # create the optimizer
    # optimizer type
    if optimizer_type == 'SGD':
        optimizer_v2_learning_weights  = optim.SGD(model_v2_learning_weights.parameters(), lr= mobileNetLearning_lr, momentum=momentum, weight_decay=mobileNetLearning_weight_decay)
    elif optimizer_type == 'Adam':
        optimizer_v2_learning_weights  = optim.Adam(model_v2_learning_weights.parameters(), lr= mobileNetLearning_lr, betas=(0.9, 0.999), weight_decay=mobileNetLearning_weight_decay)
    elif optimizer_type == 'RMSProp':
       optimizer_v2_learning_weights  = optim.RMSprop(model_v2_learning_weights.parameters(), lr= mobileNetLearning_lr, alpha=0.99, eps=1e-08, weight_decay=mobileNetLearning_weight_decay)
    else:
        NotImplementedError("optimizer not implemented")

    #create the loss function
    loss_func_v2_learning_weights = nn.CrossEntropyLoss()


    #train the model
    loss_train_v2_learning_weights, loss_valdition_v2_learning_weights, accuracy_train_v2_learning_weights, accuracy_validtion_v2_learning_weights ,model_v2_learning_weights = \
        (train(model_v2_learning_weights,trainloader,valloader,loss_func_v2_learning_weights,optimizer_v2_learning_weights, num_of_epochs_mobileNetLearning))

    #show the plots
    show_plots(loss_train_v2_learning_weights,loss_valdition_v2_learning_weights,accuracy_train_v2_learning_weights,accuracy_validtion_v2_learning_weights)

    # #test the model
    test(model_v2_learning_weights,testloader,loss_func_v2_learning_weights)








if __name__ == "__main__":

    main()