# **Material_1**

## **dataset: CIFAR10**

In [None]:
! nvidia-smi

Wed May 22 19:36:59 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0              48W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
# Data processing
import numpy as np
import torch
import torchvision
import torch.nn as nn
from matplotlib import pyplot as plt
import torch
from torch.utils.data import DataLoader, Subset
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import random_split
from torch.utils.data import DataLoader, ConcatDataset, random_split

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
################################################################################################################################################
# 50000 training images / 4 = 12500
# Set download to True for the first use to automatically download the dataset
all_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
print(len(all_set))  # Print the number of training samples
print(len(test_set))  # Print the number of test samples
e12_, e34_ = random_split(dataset=all_set, lengths=[25000, 25000], generator=torch.Generator().manual_seed(0))
################################################################################################################################################
num_classes = 10  # Number of classes
samples_per_class = 10  # Number of samples to extract per class
################################################################################################################################################
# Extract samples from e12_
extracted_samples_e12 = {class_idx: [] for class_idx in range(10)}  # Initialize a dictionary to store extracted samples
for idx, (image, label) in enumerate(e12_):  # Extract specified number of samples from e12_ dataset
    if len(extracted_samples_e12[label]) < 10:
        extracted_samples_e12[label].append(idx)
    if all(len(samples) == 10 for samples in extracted_samples_e12.values()):  # Check if required number of samples are extracted
        break
extracted_samples_flat_e12 = [sample_idx for samples in extracted_samples_e12.values() for sample_idx in samples]  # Flatten the list of extracted samples
ee12_ = torch.utils.data.Subset(e12_, [idx for idx in range(len(e12_)) if idx not in extracted_samples_flat_e12])  # Remove extracted samples from e12_
print(len(ee12_))  # Print the number of remaining samples in e12_
extracted_subset_e12 = torch.utils.data.Subset(ee12_, extracted_samples_flat_e12)
e12_extracted_loader = torch.utils.data.DataLoader(extracted_subset_e12, batch_size=100, shuffle=False, num_workers=0)
e12_extracted_loader_iter = iter(e12_extracted_loader)
e12_extracted_loader_image, e12_extracted_loader_label = next(e12_extracted_loader_iter)
################################################################################################################################################
# Extract samples from e34_
extracted_samples_e34 = {class_idx: [] for class_idx in range(10)}  # Initialize a dictionary to store extracted samples
for idx, (image, label) in enumerate(e34_):  # Extract specified number of samples from e34_ dataset
    if len(extracted_samples_e34[label]) < 10:
        extracted_samples_e34[label].append(idx)
    if all(len(samples) == 10 for samples in extracted_samples_e34.values()):  # Check if required number of samples are extracted
        break
extracted_samples_flat_e34 = [sample_idx for samples in extracted_samples_e34.values() for sample_idx in samples]  # Flatten the list of extracted samples
ee34_ = torch.utils.data.Subset(e34_, [idx for idx in range(len(e34_)) if idx not in extracted_samples_flat_e34])  # Remove extracted samples from e34_
print(len(ee34_))  # Print the number of remaining samples in e34_
extracted_subset_e34 = torch.utils.data.Subset(ee34_, extracted_samples_flat_e34)
e34_extracted_loader = torch.utils.data.DataLoader(extracted_subset_e34, batch_size=100, shuffle=False, num_workers=0)
e34_extracted_loader_iter = iter(e34_extracted_loader)
e34_extracted_loader_image, e34_extracted_loader_label = next(e34_extracted_loader_iter)
################################################################################################################################################
# Concatenate the remaining samples from e12_ and e34_
e1234 = ConcatDataset([ee12_, ee34_])
e3412 = ConcatDataset([ee34_, ee12_])
e1234_loader = torch.utils.data.DataLoader(e1234, batch_size=40, shuffle=True, num_workers=0)
e3412_loader = torch.utils.data.DataLoader(e3412, batch_size=40, shuffle=True, num_workers=0)
################################################################################################################################################
# Set download to True for the first use to automatically download the dataset
s_test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
s_test_loader = torch.utils.data.DataLoader(s_test_set, batch_size=50000, shuffle=False, num_workers=0)
s_test_data_iter = iter(s_test_loader)
s_test_image, s_test_label = next(s_test_data_iter)
print(s_test_image.shape)  # Print the shape of the test dataset images

s_tra_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
s_tra_loader = torch.utils.data.DataLoader(s_tra_set, batch_size=10000, shuffle=False, num_workers=0)
s_tra_data_iter = iter(s_tra_loader)
s_tra_image, s_tra_label = next(s_tra_data_iter)
################################################################################################################################################

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 75016680.91it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
50000
10000
24900
24900
torch.Size([10000, 3, 32, 32])


In [None]:
import random

## Sampling ## validation
############################ e12_ ###########################
class_indices = {class_idx: [] for class_idx in range(10)}  # Initialize dictionary to store class indices
for idx, (image, label) in enumerate(ee12_):  # Enumerate through ee12_ dataset
    class_indices[label].append(idx)  # Append index to corresponding class
sample_indices_12 = []
for class_idx, indices in class_indices.items():  # For each class, sample 10 indices
    sample_indices_12.extend(random.sample(indices, 10))
sample_e12 = torch.utils.data.Subset(ee12_, sample_indices_12)  # Create a subset of ee12_ with sampled indices
e12_s_loader = torch.utils.data.DataLoader(sample_e12, batch_size=100, shuffle=False, num_workers=0)  # DataLoader for the sampled subset
e12_s_loader_iter = iter(e12_s_loader)  # Create an iterator
e12_s_loader_image, e12_s_loader_label = next(e12_s_loader_iter)  # Get a batch of images and labels

############################ e34_ ###########################
class_indices = {class_idx: [] for class_idx in range(10)}  # Initialize dictionary to store class indices
for idx, (image, label) in enumerate(ee34_):  # Enumerate through ee34_ dataset
    class_indices[label].append(idx)  # Append index to corresponding class
sample_indices_34 = []
for class_idx, indices in class_indices.items():  # For each class, sample 10 indices
    sample_indices_34.extend(random.sample(indices, 10))
sample_e34 = torch.utils.data.Subset(ee34_, sample_indices_34)  # Create a subset of ee34_ with sampled indices
e34_s_loader = torch.utils.data.DataLoader(sample_e34, batch_size=100, shuffle=False, num_workers=0)  # DataLoader for the sampled subset
e34_s_loader_iter = iter(e34_s_loader)  # Create an iterator
e34_s_loader_image, e34_s_loader_label = next(e34_s_loader_iter)  # Get a batch of images and labels

In [None]:
cuda = True if torch.cuda.is_available() else False

### **Network1 : use only linear layers + Generalization Decision Process (GDP)**

In [None]:
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch

class LLNet(nn.Module):  # Use only linear layers
    def __init__(self):
        super(LLNet, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 1000)  # First fully connected layer
        self.fc2 = nn.Linear(1000, 500)      # Second fully connected layer
        self.fc3 = nn.Linear(500, 100)       # Third fully connected layer
        self.fc4 = nn.Linear(100, 50)        # Fourth fully connected layer
        self.fc5 = nn.Linear(50, 25)         # Fifth fully connected layer
        self.fc6 = nn.Linear(25, 20)         # Sixth fully connected layer
        self.fc7 = nn.Linear(20, 10)         # Seventh fully connected layer (output layer)

    def forward(self, x):
        x = x.view(-1, 3*32*32)  # Flatten the input image
        x = self.fc1(x)  # Apply first fully connected layer
        x = self.fc2(x)  # Apply second fully connected layer
        x = self.fc3(x)  # Apply third fully connected layer
        x = self.fc4(x)  # Apply fourth fully connected layer
        x = self.fc5(x)  # Apply fifth fully connected layer
        x = self.fc6(x)  # Apply sixth fully connected layer
        x = self.fc7(x)  # Apply seventh fully connected layer (output)
        return x

loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function
net = LLNet()  # Instantiate the network

# If a GPU is available, run everything in CUDA mode
if torch.cuda.is_available():
    net = net.cuda()  # Move the network to GPU
    loss_function = loss_function.cuda()  # Move the loss function to GPU

optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Define the optimizer with a learning rate of 0.001

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter, defaultdict
from itertools import combinations
import random

a_train = []  # To store training accuracy
a_test = []  # To store test accuracy
lossaaa = []  # To store loss values
Inf = []  # To store information values
Var_all = []  # To store all variation values
Generalization_Ratio_ = []  # To store generalization ratios
dicide_action = []  # To store decision actions
loss_before = torch.tensor(30.0)  # Initial loss value
los = torch.tensor(30.0)  # Initial loss value
dis_before_A1 = torch.tensor(0)  # Distance before action A1
dis_before_A2 = torch.tensor(0)  # Distance before action A2
dis = torch.tensor(1)  # Distance increment
per = "N"  # Previous iteration's action
state_before = torch.tensor(0.0)  # Previous state value
categrary_number = 10  # Number of categories
tra_val_number = 2  # Training validation number

for epoch in range(5):  # Loop over the dataset multiple times
    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader
    running_loss = 0.0  # Running loss
    running_loss_all = 0.0  # Running loss for all
    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable
    los = Variable(los, requires_grad=False)  # Make los a variable
    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable
    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable
    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable
    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable
    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable
    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable

    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data
        ### calculate losses
        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability
        labels = labels.cuda()  # Move labels to GPU
        imgs = imgs.cuda()  # Move images to GPU
        out_e1234 = net(imgs)  # Get output from the network
        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)

        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter
        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU
        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU
        out_e3412 = net(e3412_imgs)  # Get output from the network
        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)

        #################### extract
        ## e1 ext count
        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU
        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU
        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network
        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices

        ## e2 ext count
        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU
        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU
        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network
        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices

        ########### e1 count
        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU
        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU
        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network
        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices
        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks
        split_e12_inform = list(split_e12_inform)  # Convert split into list

        for i in range(len(split_e12_inform)):
            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences
            counts_e12 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e12[category] = counts_E12[category]  # Update counts
            split_e12_inform[i] = counts_e12  # Store updated counts

        ########### e2 count
        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU
        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU
        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network
        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices
        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks
        split_e34_inform = list(split_e34_inform)  # Convert split into list

        for i in range(len(split_e34_inform)):
            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences
            counts_e34 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e34[category] = counts_E34[category]  # Update counts
            split_e34_inform[i] = counts_e34  # Store updated counts

        ##########
        ## IN_IN_pro
        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform
        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform
        counts_in_12 = torch.zeros(10)  # Initialize counts
        counts_in_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_in_12[category] = counts_i_12[category]  # Update counts for e12
        total_samples_in_12 = len(e12_inform)  # Total samples in e12

        for category in range(10):
            counts_in_34[category] = counts_i_34[category]  # Update counts for e34
        total_samples_in_34 = len(e34_inform)  # Total samples in e34

        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12
        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12
        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12

        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34
        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34
        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34

        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted
        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted
        counts_extra_12 = torch.zeros(10)  # Initialize counts
        counts_extra_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted
        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12

        for category in range(10):
            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted
        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34

        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted
        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted
        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted

        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted
        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted
        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted

        ############################### Variation x ###################################
        in_12_all = probabilities_tensor_in_12
        in_34_all = probabilities_tensor_in_34
        in_12_extra = probabilities_tensor_extra_12
        in_34_extra = probabilities_tensor_extra_34
        weight_tra_probility_all = 1.0 / 2
        dow_all = \
              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +
              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)
        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all
        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all
        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all
        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all
        in_1_all = in_1_all[0]
        in_2_all = in_2_all[0]
        e_1_all = e_1_all[0]
        e_2_all = e_2_all[0]
        d_KL_all = torch.zeros(10)
        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)
        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)
        d_KL_all = torch.max(abs(k_divergence_all))
        d_KL_all_ = torch.max(abs(k_divergence_all_))
        Variation_all = torch.max(d_KL_all, d_KL_all_)
        Var_all.append(Variation_all)

        ############################ Information ###################################
        all_combinations = list(combinations(range(10), 2))
        K = categrary_number * (categrary_number - 1)
        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor
        for c in range(10):
            for idx, (i, j) in enumerate(all_combinations):
                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))
                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))
                min_value = torch.min(s1, s3)
                result_tensor[idx] = min_value.item()
                idx_ = idx + len(all_combinations)
                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))
                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))
                min_value_ = torch.min(s1_, s3_)
                result_tensor[idx_] = min_value_.item()
        Information = torch.sum(result_tensor) / K
        Inf.append(Information)

        ############################ Generalization_Ratio ###################################
        Generalization_Ratio = Variation_all * (Information + 1.0) / Information
        Generalization_Ratio_.append(Generalization_Ratio)

        ############################ Generalization Decision Process (GDP) ###################################
        state_now = Generalization_Ratio
        loss_before = loss_before.cuda()
        state_before = state_before.cuda()
        state_dis = state_now - state_before
        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)

        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,
        # choose one based on the reward
        if state_dis >= 0.0:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
        else:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"

        ####################################
        #### optimizer
        los = loss - loss_before
        state_before = state_now
        optimizer_L.zero_grad()
        loss.backward()
        optimizer_L.step()
        loss_before = loss
        running_loss += loss.item()

        # Print statistics
        if step % 5 == 4:  # Print every 500 mini-batches
            with torch.no_grad():  # 'with' is a context manager
                s_test_image = s_test_image.cuda()
                s_test_label = s_test_label.cuda()
                s_tra_image = s_tra_image.cuda()
                s_tra_label = s_tra_label.cuda()
                outputs = net(s_test_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)
                a_test.append(float(accuracy))
                outputs_t = net(s_tra_image)  # [batch, 10]
                predict_y_t = torch.max(outputs_t, dim=1)[1]
                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)
                a_train.append(float(accuracy_t))
                lossaaa.append(float(running_loss / 5))
                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))
                running_loss = 0.0
                running_loss = 0.0

[1,     5] train_loss: 2.287 train_accuracy: 0.204 test_accuracy: 0.205
[1,    10] train_loss: 2.102 train_accuracy: 0.216 test_accuracy: 0.217
[1,    15] train_loss: 2.142 train_accuracy: 0.214 test_accuracy: 0.207
[1,    20] train_loss: 2.094 train_accuracy: 0.242 test_accuracy: 0.237
[1,    25] train_loss: 2.087 train_accuracy: 0.259 test_accuracy: 0.268
[1,    30] train_loss: 1.935 train_accuracy: 0.273 test_accuracy: 0.269
[1,    35] train_loss: 2.063 train_accuracy: 0.274 test_accuracy: 0.272
[1,    40] train_loss: 2.122 train_accuracy: 0.276 test_accuracy: 0.272
[1,    45] train_loss: 1.929 train_accuracy: 0.267 test_accuracy: 0.256
[1,    50] train_loss: 1.958 train_accuracy: 0.288 test_accuracy: 0.282
[1,    55] train_loss: 1.889 train_accuracy: 0.270 test_accuracy: 0.266
[1,    60] train_loss: 2.039 train_accuracy: 0.285 test_accuracy: 0.279
[1,    65] train_loss: 1.980 train_accuracy: 0.308 test_accuracy: 0.309
[1,    70] train_loss: 2.049 train_accuracy: 0.293 test_accuracy

In [None]:
# Writing training accuracy to file
with open('Accuracy_a_train.txt', 'w') as file:
    for i in range(len(a_train)):
        file.write(str(a_train[i]))
        if i < len(a_train) - 1:
            file.write(', ')

# Writing test accuracy to file
with open('Accuracy_a_test.txt', 'w') as file:
    for i in range(len(a_test)):
        file.write(str(a_test[i]))
        if i < len(a_test) - 1:
            file.write(', ')

# Writing loss values to file
with open('Accuracy_loss.txt', 'w') as file:
    for i in range(len(lossaaa)):
        file.write(str(lossaaa[i]))
        if i < len(lossaaa) - 1:
            file.write(', ')

In [None]:
# Convert tensors to lists
Inf_list = [tensor.item() for tensor in Inf]
Var_av_list = [tensor.item() for tensor in Var_all]
Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]

# Writing Inf_list to file
with open('Inf_OOD.txt', 'w') as file:
    for i in range(len(Inf_list)):
        file.write(str(Inf_list[i]))
        if i < len(Inf_list) - 1:
            file.write(', ')

# Writing Var_av_list to file
with open('Var_all_OOD.txt', 'w') as file:
    for i in range(len(Var_av_list)):
        file.write(str(Var_av_list[i]))
        if i < len(Var_av_list) - 1:
            file.write(', ')

# Writing Generalization_Ratio_list to file
with open('Generalization_Ratio_list.txt', 'w') as file:
    for i in range(len(Generalization_Ratio_list)):
        file.write(str(Generalization_Ratio_list[i]))
        if i < len(Generalization_Ratio_list) - 1:
            file.write(', ')

In [None]:
import time
import os  # Import the os module

os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist
model_path = './Models/'  # Define the model path

rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'

# Save the training results
current_model_path = model_path + rq + "_model.pkl"  # Create the full path for the model file with the current timestamp
torch.save(net, current_model_path)  # Save the model to the specified path
print("Saved model file: " + current_model_path)  # Print the path of the saved model file

In [None]:
# Display the highest value that appears during training
print("train:", max(a_train))  # Print the highest value in the training data
print("test:", max(a_test))  # Print the highest value in the test data
print("loss:", min(lossaaa))  # Print the minimum value of the loss

train: 0.4225
test: 0.3958
loss: 1.5249487638473511


In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 test dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=50000, shuffle=True, num_workers=0)  # Create DataLoader for training set
t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for test set

trainloader_iter = iter(trainloader)  # Create an iterator for the training DataLoader
tl_image, tl_label = next(trainloader_iter)  # Get the next batch of images and labels from the training DataLoader
tl_image = tl_image.cuda()  # Move training images to GPU
tl_label = tl_label.cuda()  # Move training labels to GPU
tl_imageoutputs = net(tl_image)  # [batch, 10]  # Get the outputs from the network for the training images
predict_y = torch.max(tl_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)  # Calculate accuracy for the training set
print("train:", float(accuracy))  # Print the training accuracy

t_data_iter = iter(t_loader)  # Create an iterator for the test DataLoader
t_image, t_label = next(t_data_iter)  # Get the next batch of images and labels from the test DataLoader
t_image = t_image.cuda()  # Move test images to GPU
t_label = t_label.cuda()  # Move test labels to GPU
t_imageoutputs = net(t_image)  # [batch, 10]  # Get the outputs from the network for the test images
predict = torch.max(t_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)  # Calculate accuracy for the test set
print("test:", float(accuracy_t))  # Print the test accuracy

Files already downloaded and verified
train: 0.40658
test: 0.3874


### **Network1 : use only linear layers (not use GDP)**

In [None]:
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch

class LLNet(nn.Module):  # Use only linear layers
    def __init__(self):
        super(LLNet, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 1000)  # First fully connected layer
        self.fc2 = nn.Linear(1000, 500)      # Second fully connected layer
        self.fc3 = nn.Linear(500, 100)       # Third fully connected layer
        self.fc4 = nn.Linear(100, 50)        # Fourth fully connected layer
        self.fc5 = nn.Linear(50, 25)         # Fifth fully connected layer
        self.fc6 = nn.Linear(25, 20)         # Sixth fully connected layer
        self.fc7 = nn.Linear(20, 10)         # Seventh fully connected layer (output layer)

    def forward(self, x):
        x = x.view(-1, 3*32*32)  # Flatten the input image
        x = self.fc1(x)  # Apply first fully connected layer
        x = self.fc2(x)  # Apply second fully connected layer
        x = self.fc3(x)  # Apply third fully connected layer
        x = self.fc4(x)  # Apply fourth fully connected layer
        x = self.fc5(x)  # Apply fifth fully connected layer
        x = self.fc6(x)  # Apply sixth fully connected layer
        x = self.fc7(x)  # Apply seventh fully connected layer (output)
        return x

loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function
net = LLNet()  # Instantiate the network

# If a GPU is available, run everything in CUDA mode
if torch.cuda.is_available():
    net = net.cuda()  # Move the network to GPU
    loss_function = loss_function.cuda()  # Move the loss function to GPU

optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Define the optimizer with a learning rate of 0.001

In [None]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
from matplotlib import pyplot as plt
import random
import torch.optim as optim
import torchvision.transforms as transforms
import time
import torch.nn.functional as F
from collections import Counter, defaultdict
from itertools import combinations

################################################ reload
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])  # Define the transformation to be applied to the images

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)  # Load CIFAR-10 test dataset

train_loader = torch.utils.data.DataLoader(train_set, batch_size=40, shuffle=True, num_workers=0)  # Create DataLoader for training set
testloader = torch.utils.data.DataLoader(test_set, batch_size=40, shuffle=True, num_workers=0)  # Create DataLoader for test set

val_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 validation dataset
val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for validation set
val_data_iter = iter(val_loader)  # Create an iterator for the validation DataLoader
val_image, val_label = next(val_data_iter)  # Get the next batch of images and labels from the validation DataLoader
#print(val_image.shape)  # Print the shape of validation images

##################
tra_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)  # Load CIFAR-10 training dataset again
tra_loader = torch.utils.data.DataLoader(tra_set, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for the training set
tra_data_iter = iter(tra_loader)  # Create an iterator for the training DataLoader
tra_image, tra_label = next(tra_data_iter)  # Get the next batch of images and labels from the training DataLoader

##################
classes = ('plane', 'car', 'bird', 'cat',  # Define the class labels
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

optimizer = optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001

########################################################

a_train = []  # To store training accuracy
a_test = []  # To store test accuracy
lossaaa = []  # To store loss values
Inf = []  # To store information values
Var_all = []  # To store all variation values
Generalization_Ratio_ = []  # To store generalization ratios
dicide_action = []  # To store decision actions
loss_before = torch.tensor(30.0)  # Initial loss value
los = torch.tensor(30.0)  # Initial loss value
dis_before_A1 = torch.tensor(0)  # Distance before action A1
dis_before_A2 = torch.tensor(0)  # Distance before action A2
dis = torch.tensor(1)  # Distance increment
per = "N"  # Previous iteration's action
state_before = torch.tensor(0.0)  # Previous state value
categrary_number = 10  # Number of categories
tra_val_number = 2  # Training validation number

for epoch in range(5):  # Loop over the dataset multiple times
    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader
    running_loss = 0.0  # Running loss
    running_loss_all = 0.0  # Running loss for all
    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable
    los = Variable(los, requires_grad=False)  # Make los a variable
    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable
    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable
    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable
    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable
    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable
    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable

    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data
        ### calculate losses
        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability
        labels = labels.cuda()  # Move labels to GPU
        imgs = imgs.cuda()  # Move images to GPU
        out_e1234 = net(imgs)  # Get output from the network
        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)

        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter
        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU
        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU
        out_e3412 = net(e3412_imgs)  # Get output from the network
        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)

        #################### extract
        ## e1 ext count
        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU
        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU
        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network
        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices

        ## e2 ext count
        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU
        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU
        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network
        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices

        ########### e1 count
        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU
        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU
        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network
        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices
        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks
        split_e12_inform = list(split_e12_inform)  # Convert split into list

        for i in range(len(split_e12_inform)):
            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences
            counts_e12 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e12[category] = counts_E12[category]  # Update counts
            split_e12_inform[i] = counts_e12  # Store updated counts

        ########### e2 count
        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU
        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU
        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network
        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices
        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks
        split_e34_inform = list(split_e34_inform)  # Convert split into list

        for i in range(len(split_e34_inform)):
            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences
            counts_e34 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e34[category] = counts_E34[category]  # Update counts
            split_e34_inform[i] = counts_e34  # Store updated counts

        ##########
        ## IN_IN_pro
        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform
        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform
        counts_in_12 = torch.zeros(10)  # Initialize counts
        counts_in_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_in_12[category] = counts_i_12[category]  # Update counts for e12
        total_samples_in_12 = len(e12_inform)  # Total samples in e12

        for category in range(10):
            counts_in_34[category] = counts_i_34[category]  # Update counts for e34
        total_samples_in_34 = len(e34_inform)  # Total samples in e34

        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12
        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12
        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12

        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34
        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34
        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34

        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted
        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted
        counts_extra_12 = torch.zeros(10)  # Initialize counts
        counts_extra_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted
        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12

        for category in range(10):
            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted
        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34

        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted
        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted
        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted

        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted
        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted
        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted

        ############################### Variation x ###################################
        in_12_all = probabilities_tensor_in_12
        in_34_all = probabilities_tensor_in_34
        in_12_extra = probabilities_tensor_extra_12
        in_34_extra = probabilities_tensor_extra_34
        weight_tra_probility_all = 1.0 / 2
        dow_all = \
              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +
              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)
        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all
        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all
        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all
        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all
        in_1_all = in_1_all[0]
        in_2_all = in_2_all[0]
        e_1_all = e_1_all[0]
        e_2_all = e_2_all[0]
        d_KL_all = torch.zeros(10)
        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)
        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)
        d_KL_all = torch.max(abs(k_divergence_all))
        d_KL_all_ = torch.max(abs(k_divergence_all_))
        Variation_all = torch.max(d_KL_all, d_KL_all_)
        Var_all.append(Variation_all)

        ############################ Information ###################################
        all_combinations = list(combinations(range(10), 2))
        K = categrary_number * (categrary_number - 1)
        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor
        for c in range(10):
            for idx, (i, j) in enumerate(all_combinations):
                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))
                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))
                min_value = torch.min(s1, s3)
                result_tensor[idx] = min_value.item()
                idx_ = idx + len(all_combinations)
                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))
                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))
                min_value_ = torch.min(s1_, s3_)
                result_tensor[idx_] = min_value_.item()
        Information = torch.sum(result_tensor) / K
        Inf.append(Information)

        ############################ Generalization_Ratio ###################################
        Generalization_Ratio = Variation_all * (Information + 1.0) / Information
        Generalization_Ratio_.append(Generalization_Ratio)

        ############################ Generalization Decision Process (GDP) ###################################
        state_now = Generalization_Ratio
        loss_before = loss_before.cuda()
        state_before = state_before.cuda()
        state_dis = state_now - state_before
        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)

        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,
        # choose one based on the reward
        if state_dis >= 0.0:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
        else:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"

        ####################################
        #### optimizer
        los = loss - loss_before
        ####################################
        loss = loss_out_e1234  # Losses from not using the selection
        ####################################
        state_before = state_now
        optimizer_L.zero_grad()
        loss.backward()
        optimizer_L.step()
        loss_before = loss
        running_loss += loss.item()

        # Print statistics
        if step % 5 == 4:  # Print every 500 mini-batches
            with torch.no_grad():  # 'with' is a context manager
                s_test_image = s_test_image.cuda()
                s_test_label = s_test_label.cuda()
                s_tra_image = s_tra_image.cuda()
                s_tra_label = s_tra_label.cuda()
                outputs = net(s_test_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)
                a_test.append(float(accuracy))
                outputs_t = net(s_tra_image)  # [batch, 10]
                predict_y_t = torch.max(outputs_t, dim=1)[1]
                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)
                a_train.append(float(accuracy_t))
                lossaaa.append(float(running_loss / 5))
                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))
                running_loss = 0.0
                running_loss = 0.0

Files already downloaded and verified
Files already downloaded and verified
[1,     5] train_loss: 2.300 train_accuracy: 0.204 test_accuracy: 0.197
[1,    10] train_loss: 2.114 train_accuracy: 0.254 test_accuracy: 0.250
[1,    15] train_loss: 2.209 train_accuracy: 0.208 test_accuracy: 0.212
[1,    20] train_loss: 2.156 train_accuracy: 0.197 test_accuracy: 0.204
[1,    25] train_loss: 2.146 train_accuracy: 0.243 test_accuracy: 0.242
[1,    30] train_loss: 2.082 train_accuracy: 0.253 test_accuracy: 0.247
[1,    35] train_loss: 2.015 train_accuracy: 0.237 test_accuracy: 0.237
[1,    40] train_loss: 2.111 train_accuracy: 0.262 test_accuracy: 0.268
[1,    45] train_loss: 2.055 train_accuracy: 0.257 test_accuracy: 0.253
[1,    50] train_loss: 2.011 train_accuracy: 0.250 test_accuracy: 0.243
[1,    55] train_loss: 2.073 train_accuracy: 0.230 test_accuracy: 0.228
[1,    60] train_loss: 2.007 train_accuracy: 0.270 test_accuracy: 0.267
[1,    65] train_loss: 2.008 train_accuracy: 0.274 test_accu

In [None]:
# Writing training accuracy to file
with open('Accuracy_a_train.txt', 'w') as file:
    for i in range(len(a_train)):
        file.write(str(a_train[i]))
        if i < len(a_train) - 1:
            file.write(', ')

# Writing test accuracy to file
with open('Accuracy_a_test.txt', 'w') as file:
    for i in range(len(a_test)):
        file.write(str(a_test[i]))
        if i < len(a_test) - 1:
            file.write(', ')

# Writing loss values to file
with open('Accuracy_loss.txt', 'w') as file:
    for i in range(len(lossaaa)):
        file.write(str(lossaaa[i]))
        if i < len(lossaaa) - 1:
            file.write(', ')

In [None]:
# Convert tensors to lists
Inf_list = [tensor.item() for tensor in Inf]
Var_av_list = [tensor.item() for tensor in Var_all]
Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]

# Writing Inf_list to file
with open('Inf_OOD.txt', 'w') as file:
    for i in range(len(Inf_list)):
        file.write(str(Inf_list[i]))
        if i < len(Inf_list) - 1:
            file.write(', ')

# Writing Var_av_list to file
with open('Var_all_OOD.txt', 'w') as file:
    for i in range(len(Var_av_list)):
        file.write(str(Var_av_list[i]))
        if i < len(Var_av_list) - 1:
            file.write(', ')

# Writing Generalization_Ratio_list to file
with open('Generalization_Ratio_list.txt', 'w') as file:
    for i in range(len(Generalization_Ratio_list)):
        file.write(str(Generalization_Ratio_list[i]))
        if i < len(Generalization_Ratio_list) - 1:
            file.write(', ')

In [None]:
import time
import os  # Import the os module

os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist
model_path = './Models/'  # Define the model path

rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'

# Save the training results
current_model_path = model_path + rq + "_model.pkl"  # Create the full path for the model file with the current timestamp
torch.save(net, current_model_path)  # Save the model to the specified path
print("Saved model file: " + current_model_path)  # Print the path of the saved model file

In [None]:
# Display the highest value that appears during training
print("train:", max(a_train))  # Print the highest value in the training data
print("test:", max(a_test))  # Print the highest value in the test data
print("loss:", min(lossaaa))  # Print the minimum value of the loss

train: 0.4241
test: 0.3979
loss: 1.5480000495910644


In [None]:
set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 test dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=50000, shuffle=True, num_workers=0)  # Create DataLoader for training set
t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for test set

trainloader_iter = iter(trainloader)  # Create an iterator for the training DataLoader
tl_image, tl_label = next(trainloader_iter)  # Get the next batch of images and labels from the training DataLoader
tl_image = tl_image.cuda()  # Move training images to GPU
tl_label = tl_label.cuda()  # Move training labels to GPU
tl_imageoutputs = net(tl_image)  # [batch, 10]  # Get the outputs from the network for the training images
predict_y = torch.max(tl_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)  # Calculate accuracy for the training set
print("train:", float(accuracy))  # Print the training accuracy

t_data_iter = iter(t_loader)  # Create an iterator for the test DataLoader
t_image, t_label = next(t_data_iter)  # Get the next batch of images and labels from the test DataLoader
t_image = t_image.cuda()  # Move test images to GPU
t_label = t_label.cuda()  # Move test labels to GPU
t_imageoutputs = net(t_image)  # [batch, 10]  # Get the outputs from the network for the test images
predict = torch.max(t_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)  # Calculate accuracy for the test set
print("test:", float(accuracy_t))  # Print the test accuracy

Files already downloaded and verified
train: 0.39686
test: 0.3743


### **Network2 : use linear layers and active layers + Generalization Decision Process (GDP)**

In [None]:
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch

class LANet(nn.Module):
    def __init__(self):  # Initialization function
        super(LANet, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 1000)  # Define the first fully connected layer
        self.fc2 = nn.Linear(1000, 500)  # Define the second fully connected layer
        self.fc3 = nn.Linear(500, 100)  # Define the third fully connected layer
        self.fc4 = nn.Linear(100, 50)  # Define the fourth fully connected layer
        self.fc5 = nn.Linear(50, 25)  # Define the fifth fully connected layer
        self.fc6 = nn.Linear(25, 20)  # Define the sixth fully connected layer
        self.fc7 = nn.Linear(20, 10)  # Define the seventh fully connected layer

    def forward(self, x):  # Define the forward pass

        x = x.view(-1, 3*32*32)  # Flatten the input tensor
        x = F.relu(self.fc1(x))  # Apply ReLU activation after the first layer
        x = self.fc2(x)  # Apply the second layer
        x = F.relu(self.fc3(x))  # Apply ReLU activation after the third layer
        x = self.fc4(x)  # Apply the fourth layer
        x = F.relu(self.fc5(x))  # Apply ReLU activation after the fifth layer
        x = F.relu(self.fc6(x))  # Apply ReLU activation after the sixth layer
        x = self.fc7(x)  # Apply the seventh layer
        return x  # Return the output

loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function as CrossEntropyLoss
net = LANet()  # Instantiate the LANet model
# If a GPU is available, run in CUDA mode
if torch.cuda.is_available():
    net = net.cuda()  # Move the network to the GPU
    loss_function = loss_function.cuda()  # Move the loss function to the GPU
optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter, defaultdict
from itertools import combinations
import random

a_train = []  # To store training accuracy
a_test = []  # To store test accuracy
lossaaa = []  # To store loss values
Inf = []  # To store information values
Var_all = []  # To store all variation values
Generalization_Ratio_ = []  # To store generalization ratios
dicide_action = []  # To store decision actions
loss_before = torch.tensor(30.0)  # Initial loss value
los = torch.tensor(30.0)  # Initial loss value
dis_before_A1 = torch.tensor(0)  # Distance before action A1
dis_before_A2 = torch.tensor(0)  # Distance before action A2
dis = torch.tensor(1)  # Distance increment
per = "N"  # Previous iteration's action
state_before = torch.tensor(0.0)  # Previous state value
categrary_number = 10  # Number of categories
tra_val_number = 2  # Training validation number

for epoch in range(5):  # Loop over the dataset multiple times
    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader
    running_loss = 0.0  # Running loss
    running_loss_all = 0.0  # Running loss for all
    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable
    los = Variable(los, requires_grad=False)  # Make los a variable
    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable
    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable
    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable
    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable
    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable
    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable

    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data
        ### calculate losses
        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability
        labels = labels.cuda()  # Move labels to GPU
        imgs = imgs.cuda()  # Move images to GPU
        out_e1234 = net(imgs)  # Get output from the network
        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)

        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter
        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU
        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU
        out_e3412 = net(e3412_imgs)  # Get output from the network
        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)

        #################### extract
        ## e1 ext count
        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU
        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU
        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network
        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices

        ## e2 ext count
        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU
        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU
        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network
        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices

        ########### e1 count
        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU
        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU
        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network
        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices
        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks
        split_e12_inform = list(split_e12_inform)  # Convert split into list

        for i in range(len(split_e12_inform)):
            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences
            counts_e12 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e12[category] = counts_E12[category]  # Update counts
            split_e12_inform[i] = counts_e12  # Store updated counts

        ########### e2 count
        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU
        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU
        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network
        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices
        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks
        split_e34_inform = list(split_e34_inform)  # Convert split into list

        for i in range(len(split_e34_inform)):
            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences
            counts_e34 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e34[category] = counts_E34[category]  # Update counts
            split_e34_inform[i] = counts_e34  # Store updated counts

        ##########
        ## IN_IN_pro
        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform
        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform
        counts_in_12 = torch.zeros(10)  # Initialize counts
        counts_in_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_in_12[category] = counts_i_12[category]  # Update counts for e12
        total_samples_in_12 = len(e12_inform)  # Total samples in e12

        for category in range(10):
            counts_in_34[category] = counts_i_34[category]  # Update counts for e34
        total_samples_in_34 = len(e34_inform)  # Total samples in e34

        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12
        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12
        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12

        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34
        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34
        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34

        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted
        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted
        counts_extra_12 = torch.zeros(10)  # Initialize counts
        counts_extra_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted
        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12

        for category in range(10):
            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted
        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34

        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted
        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted
        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted

        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted
        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted
        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted

        ############################### Variation x ###################################
        in_12_all = probabilities_tensor_in_12
        in_34_all = probabilities_tensor_in_34
        in_12_extra = probabilities_tensor_extra_12
        in_34_extra = probabilities_tensor_extra_34
        weight_tra_probility_all = 1.0 / 2
        dow_all = \
              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +
              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)
        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all
        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all
        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all
        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all
        in_1_all = in_1_all[0]
        in_2_all = in_2_all[0]
        e_1_all = e_1_all[0]
        e_2_all = e_2_all[0]
        d_KL_all = torch.zeros(10)
        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)
        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)
        d_KL_all = torch.max(abs(k_divergence_all))
        d_KL_all_ = torch.max(abs(k_divergence_all_))
        Variation_all = torch.max(d_KL_all, d_KL_all_)
        Var_all.append(Variation_all)

        ############################ Information ###################################
        all_combinations = list(combinations(range(10), 2))
        K = categrary_number * (categrary_number - 1)
        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor
        for c in range(10):
            for idx, (i, j) in enumerate(all_combinations):
                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))
                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))
                min_value = torch.min(s1, s3)
                result_tensor[idx] = min_value.item()
                idx_ = idx + len(all_combinations)
                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))
                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))
                min_value_ = torch.min(s1_, s3_)
                result_tensor[idx_] = min_value_.item()
        Information = torch.sum(result_tensor) / K
        Inf.append(Information)

        ############################ Generalization_Ratio ###################################
        Generalization_Ratio = Variation_all * (Information + 1.0) / Information
        Generalization_Ratio_.append(Generalization_Ratio)

        ############################ Generalization Decision Process (GDP) ###################################
        state_now = Generalization_Ratio
        loss_before = loss_before.cuda()
        state_before = state_before.cuda()
        state_dis = state_now - state_before
        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)

        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,
        # choose one based on the reward
        if state_dis >= 0.0:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
        else:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"

        ####################################
        #### optimizer
        los = loss - loss_before
        state_before = state_now
        optimizer_L.zero_grad()
        loss.backward()
        optimizer_L.step()
        loss_before = loss
        running_loss += loss.item()

        # Print statistics
        if step % 5 == 4:  # Print every 500 mini-batches
            with torch.no_grad():  # 'with' is a context manager
                s_test_image = s_test_image.cuda()
                s_test_label = s_test_label.cuda()
                s_tra_image = s_tra_image.cuda()
                s_tra_label = s_tra_label.cuda()
                outputs = net(s_test_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)
                a_test.append(float(accuracy))
                outputs_t = net(s_tra_image)  # [batch, 10]
                predict_y_t = torch.max(outputs_t, dim=1)[1]
                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)
                a_train.append(float(accuracy_t))
                lossaaa.append(float(running_loss / 5))
                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))
                running_loss = 0.0
                running_loss = 0.0

[1,     5] train_loss: 0.967 train_accuracy: 0.647 test_accuracy: 0.520
[1,    10] train_loss: 1.000 train_accuracy: 0.653 test_accuracy: 0.525
[1,    15] train_loss: 1.020 train_accuracy: 0.663 test_accuracy: 0.531
[1,    20] train_loss: 0.939 train_accuracy: 0.663 test_accuracy: 0.528
[1,    25] train_loss: 0.924 train_accuracy: 0.654 test_accuracy: 0.530
[1,    30] train_loss: 0.971 train_accuracy: 0.663 test_accuracy: 0.530
[1,    35] train_loss: 1.068 train_accuracy: 0.652 test_accuracy: 0.521
[1,    40] train_loss: 0.986 train_accuracy: 0.667 test_accuracy: 0.533
[1,    45] train_loss: 1.076 train_accuracy: 0.664 test_accuracy: 0.529
[1,    50] train_loss: 1.072 train_accuracy: 0.665 test_accuracy: 0.528
[1,    55] train_loss: 0.951 train_accuracy: 0.665 test_accuracy: 0.524
[1,    60] train_loss: 0.941 train_accuracy: 0.655 test_accuracy: 0.519
[1,    65] train_loss: 0.912 train_accuracy: 0.657 test_accuracy: 0.522
[1,    70] train_loss: 1.081 train_accuracy: 0.654 test_accuracy

In [None]:
# Writing training accuracy to file
with open('Accuracy_a_train.txt', 'w') as file:
    for i in range(len(a_train)):
        file.write(str(a_train[i]))
        if i < len(a_train) - 1:
            file.write(', ')

# Writing test accuracy to file
with open('Accuracy_a_test.txt', 'w') as file:
    for i in range(len(a_test)):
        file.write(str(a_test[i]))
        if i < len(a_test) - 1:
            file.write(', ')

# Writing loss values to file
with open('Accuracy_loss.txt', 'w') as file:
    for i in range(len(lossaaa)):
        file.write(str(lossaaa[i]))
        if i < len(lossaaa) - 1:
            file.write(', ')

In [None]:
# Convert tensors to lists
Inf_list = [tensor.item() for tensor in Inf]
Var_av_list = [tensor.item() for tensor in Var_all]
Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]

# Writing Inf_list to file
with open('Inf_OOD.txt', 'w') as file:
    for i in range(len(Inf_list)):
        file.write(str(Inf_list[i]))
        if i < len(Inf_list) - 1:
            file.write(', ')

# Writing Var_av_list to file
with open('Var_all_OOD.txt', 'w') as file:
    for i in range(len(Var_av_list)):
        file.write(str(Var_av_list[i]))
        if i < len(Var_av_list) - 1:
            file.write(', ')

# Writing Generalization_Ratio_list to file
with open('Generalization_Ratio_list.txt', 'w') as file:
    for i in range(len(Generalization_Ratio_list)):
        file.write(str(Generalization_Ratio_list[i]))
        if i < len(Generalization_Ratio_list) - 1:
            file.write(', ')

In [None]:
import time
import os  # Import the os module

os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist
model_path = './Models/'  # Define the model path

rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'

# Save the training results
current_model_path = model_path + rq + "_model.pkl"  # Create the full path for the model file with the current timestamp
torch.save(net, current_model_path)  # Save the model to the specified path
print("Saved model file: " + current_model_path)  # Print the path of the saved model file

In [None]:
# Display the highest value that appears during training
print("train:", max(a_train))  # Print the highest value in the training data
print("test:", max(a_test))  # Print the highest value in the test data
print("loss:", min(lossaaa))  # Print the minimum value of the loss

train: 0.7451
test: 0.5447
loss: 0.6319445610046387


In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 test dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=50000, shuffle=True, num_workers=0)  # Create DataLoader for training set
t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for test set

trainloader_iter = iter(trainloader)  # Create an iterator for the training DataLoader
tl_image, tl_label = next(trainloader_iter)  # Get the next batch of images and labels from the training DataLoader
tl_image = tl_image.cuda()  # Move training images to GPU
tl_label = tl_label.cuda()  # Move training labels to GPU
tl_imageoutputs = net(tl_image)  # [batch, 10]  # Get the outputs from the network for the training images
predict_y = torch.max(tl_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)  # Calculate accuracy for the training set
print("train:", float(accuracy))  # Print the training accuracy

t_data_iter = iter(t_loader)  # Create an iterator for the test DataLoader
t_image, t_label = next(t_data_iter)  # Get the next batch of images and labels from the test DataLoader
t_image = t_image.cuda()  # Move test images to GPU
t_label = t_label.cuda()  # Move test labels to GPU
t_imageoutputs = net(t_image)  # [batch, 10]  # Get the outputs from the network for the test images
predict = torch.max(t_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)  # Calculate accuracy for the test set
print("test:", float(accuracy_t))  # Print the test accuracy

Files already downloaded and verified
train: 0.73598
test: 0.5289


### **Network2 : use linear layers and active layers (not use GDP)**

In [None]:
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch

class LANet(nn.Module):
    def __init__(self):  # Initialization function
        super(LANet, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 1000)  # Define the first fully connected layer
        self.fc2 = nn.Linear(1000, 500)  # Define the second fully connected layer
        self.fc3 = nn.Linear(500, 100)  # Define the third fully connected layer
        self.fc4 = nn.Linear(100, 50)  # Define the fourth fully connected layer
        self.fc5 = nn.Linear(50, 25)  # Define the fifth fully connected layer
        self.fc6 = nn.Linear(25, 20)  # Define the sixth fully connected layer
        self.fc7 = nn.Linear(20, 10)  # Define the seventh fully connected layer

    def forward(self, x):  # Define the forward pass

        x = x.view(-1, 3*32*32)  # Flatten the input tensor
        x = F.relu(self.fc1(x))  # Apply ReLU activation after the first layer
        x = self.fc2(x)  # Apply the second layer
        x = F.relu(self.fc3(x))  # Apply ReLU activation after the third layer
        x = self.fc4(x)  # Apply the fourth layer
        x = F.relu(self.fc5(x))  # Apply ReLU activation after the fifth layer
        x = F.relu(self.fc6(x))  # Apply ReLU activation after the sixth layer
        x = self.fc7(x)  # Apply the seventh layer
        return x  # Return the output

loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function as CrossEntropyLoss
net = LANet()  # Instantiate the LANet model
# If a GPU is available, run in CUDA mode
if torch.cuda.is_available():
    net = net.cuda()  # Move the network to the GPU
    loss_function = loss_function.cuda()  # Move the loss function to the GPU
optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001

In [None]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
from matplotlib import pyplot as plt
import random

import torch.optim as optim
import torchvision.transforms as transforms
import time


################################################ reload
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])  # Define the transformation to be applied to the images

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)  # Load CIFAR-10 test dataset

train_loader = torch.utils.data.DataLoader(train_set, batch_size=40, shuffle=True, num_workers=0)  # Create DataLoader for training set
testloader = torch.utils.data.DataLoader(test_set, batch_size=40, shuffle=True, num_workers=0)  # Create DataLoader for test set

val_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 validation dataset
val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for validation set
val_data_iter = iter(val_loader)  # Create an iterator for the validation DataLoader
val_image, val_label = next(val_data_iter)  # Get the next batch of images and labels from the validation DataLoader
#print(val_image.shape)  # Print the shape of validation images

##################
tra_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)  # Load CIFAR-10 training dataset again
tra_loader = torch.utils.data.DataLoader(tra_set, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for the training set
tra_data_iter = iter(tra_loader)  # Create an iterator for the training DataLoader
tra_image, tra_label = next(tra_data_iter)  # Get the next batch of images and labels from the training DataLoader

##################
classes = ('plane', 'car', 'bird', 'cat',  # Define the class labels
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

optimizer = optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001

########################################################

import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter, defaultdict
from itertools import combinations
import random

a_train = []  # To store training accuracy
a_test = []  # To store test accuracy
lossaaa = []  # To store loss values
Inf = []  # To store information values
Var_all = []  # To store all variation values
Generalization_Ratio_ = []  # To store generalization ratios
dicide_action = []  # To store decision actions
loss_before = torch.tensor(30.0)  # Initial loss value
los = torch.tensor(30.0)  # Initial loss value
dis_before_A1 = torch.tensor(0)  # Distance before action A1
dis_before_A2 = torch.tensor(0)  # Distance before action A2
dis = torch.tensor(1)  # Distance increment
per = "N"  # Previous iteration's action
state_before = torch.tensor(0.0)  # Previous state value
categrary_number = 10  # Number of categories
tra_val_number = 2  # Training validation number

for epoch in range(5):  # Loop over the dataset multiple times
    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader
    running_loss = 0.0  # Running loss
    running_loss_all = 0.0  # Running loss for all
    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable
    los = Variable(los, requires_grad=False)  # Make los a variable
    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable
    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable
    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable
    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable
    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable
    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable

    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data
        ### calculate losses
        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability
        labels = labels.cuda()  # Move labels to GPU
        imgs = imgs.cuda()  # Move images to GPU
        out_e1234 = net(imgs)  # Get output from the network
        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)

        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter
        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU
        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU
        out_e3412 = net(e3412_imgs)  # Get output from the network
        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)

        #################### extract
        ## e1 ext count
        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU
        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU
        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network
        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices

        ## e2 ext count
        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU
        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU
        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network
        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices

        ########### e1 count
        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU
        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU
        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network
        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices
        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks
        split_e12_inform = list(split_e12_inform)  # Convert split into list

        for i in range(len(split_e12_inform)):
            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences
            counts_e12 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e12[category] = counts_E12[category]  # Update counts
            split_e12_inform[i] = counts_e12  # Store updated counts

        ########### e2 count
        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU
        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU
        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network
        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices
        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks
        split_e34_inform = list(split_e34_inform)  # Convert split into list

        for i in range(len(split_e34_inform)):
            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences
            counts_e34 = torch.zeros(10)  # Initialize counts
            for category in range(10):
                counts_e34[category] = counts_E34[category]  # Update counts
            split_e34_inform[i] = counts_e34  # Store updated counts

        ##########
        ## IN_IN_pro
        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform
        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform
        counts_in_12 = torch.zeros(10)  # Initialize counts
        counts_in_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_in_12[category] = counts_i_12[category]  # Update counts for e12
        total_samples_in_12 = len(e12_inform)  # Total samples in e12

        for category in range(10):
            counts_in_34[category] = counts_i_34[category]  # Update counts for e34
        total_samples_in_34 = len(e34_inform)  # Total samples in e34

        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12
        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12
        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12

        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34
        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34
        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34

        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted
        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted
        counts_extra_12 = torch.zeros(10)  # Initialize counts
        counts_extra_34 = torch.zeros(10)  # Initialize counts

        for category in range(10):
            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted
        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12

        for category in range(10):
            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted
        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34

        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted
        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted
        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted

        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted
        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted
        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted

        ############################### Variation x ###################################
        in_12_all = probabilities_tensor_in_12
        in_34_all = probabilities_tensor_in_34
        in_12_extra = probabilities_tensor_extra_12
        in_34_extra = probabilities_tensor_extra_34
        weight_tra_probility_all = 1.0 / 2
        dow_all = \
              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +
              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)
        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all
        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all
        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all
        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all
        in_1_all = in_1_all[0]
        in_2_all = in_2_all[0]
        e_1_all = e_1_all[0]
        e_2_all = e_2_all[0]
        d_KL_all = torch.zeros(10)
        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)
        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)
        d_KL_all = torch.max(abs(k_divergence_all))
        d_KL_all_ = torch.max(abs(k_divergence_all_))
        Variation_all = torch.max(d_KL_all, d_KL_all_)
        Var_all.append(Variation_all)

        ############################ Information ###################################
        all_combinations = list(combinations(range(10), 2))
        K = categrary_number * (categrary_number - 1)
        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor
        for c in range(10):
            for idx, (i, j) in enumerate(all_combinations):
                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))
                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))
                min_value = torch.min(s1, s3)
                result_tensor[idx] = min_value.item()
                idx_ = idx + len(all_combinations)
                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))
                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))
                min_value_ = torch.min(s1_, s3_)
                result_tensor[idx_] = min_value_.item()
        Information = torch.sum(result_tensor) / K
        Inf.append(Information)

        ############################ Generalization_Ratio ###################################
        Generalization_Ratio = Variation_all * (Information + 1.0) / Information
        Generalization_Ratio_.append(Generalization_Ratio)

        ############################ Generalization Decision Process (GDP) ###################################
        state_now = Generalization_Ratio
        loss_before = loss_before.cuda()
        state_before = state_before.cuda()
        state_dis = state_now - state_before
        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)

        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,
        # choose one based on the reward
        if state_dis >= 0.0:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
        else:
            if los > 0.0:  # Not fitting
                if per == "A1":
                    dis_before_A1 -= dis
                    dis_before_A2 += dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 += dis
                    dis_before_A2 -= dis
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"
            else:  # Overfitting
                if per == "A1":
                    dis_before_A1 += dis * 2
                    dis_before_A2 -= dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                elif per == "A2":
                    dis_before_A1 -= dis * 2
                    dis_before_A2 += dis * 2
                    if dis_before_A1 >= dis_before_A2:
                        loss = random.choices(result_tensor, weights=[3, 1])[0]
                        dis_before_A1 = dis_before_A1 * (3 / 4)
                        dis_before_A2 = dis_before_A2 * (1 / 4)
                        per = "A1"
                    else:
                        loss = random.choices(result_tensor, weights=[1, 3])[0]
                        dis_before_A1 = dis_before_A1 * (1 / 4)
                        dis_before_A2 = dis_before_A2 * (3 / 4)
                        per = "A2"
                else:
                    loss = random.choices(result_tensor, weights=[3, 1])[0]
                    dis_before_A1 = dis_before_A1 * (3 / 4)
                    dis_before_A2 = dis_before_A2 * (1 / 4)
                    per = "A1"

        ####################################
        #### optimizer
        los = loss - loss_before
        state_before = state_now
        optimizer_L.zero_grad()
        loss.backward()
        optimizer_L.step()
        loss_before = loss
        running_loss += loss.item()

        # Print statistics
        if step % 5 == 4:  # Print every 500 mini-batches
            with torch.no_grad():  # 'with' is a context manager
                s_test_image = s_test_image.cuda()
                s_test_label = s_test_label.cuda()
                s_tra_image = s_tra_image.cuda()
                s_tra_label = s_tra_label.cuda()
                outputs = net(s_test_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)
                a_test.append(float(accuracy))
                outputs_t = net(s_tra_image)  # [batch, 10]
                predict_y_t = torch.max(outputs_t, dim=1)[1]
                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)
                a_train.append(float(accuracy_t))
                lossaaa.append(float(running_loss / 5))
                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))
                running_loss = 0.0
                running_loss = 0.0

Files already downloaded and verified
Files already downloaded and verified
[1,     5] train_loss: 2.275 train_accuracy: 0.129 test_accuracy: 0.131
[1,    10] train_loss: 2.233 train_accuracy: 0.181 test_accuracy: 0.180
[1,    15] train_loss: 2.150 train_accuracy: 0.185 test_accuracy: 0.186
[1,    20] train_loss: 2.135 train_accuracy: 0.186 test_accuracy: 0.185
[1,    25] train_loss: 2.049 train_accuracy: 0.218 test_accuracy: 0.219
[1,    30] train_loss: 2.099 train_accuracy: 0.194 test_accuracy: 0.199
[1,    35] train_loss: 2.068 train_accuracy: 0.188 test_accuracy: 0.188
[1,    40] train_loss: 2.110 train_accuracy: 0.236 test_accuracy: 0.237
[1,    45] train_loss: 2.058 train_accuracy: 0.225 test_accuracy: 0.227
[1,    50] train_loss: 2.098 train_accuracy: 0.246 test_accuracy: 0.248
[1,    55] train_loss: 2.034 train_accuracy: 0.240 test_accuracy: 0.240
[1,    60] train_loss: 2.062 train_accuracy: 0.252 test_accuracy: 0.251
[1,    65] train_loss: 2.010 train_accuracy: 0.251 test_accu

In [None]:
# Writing training accuracy to file
with open('Accuracy_a_train.txt', 'w') as file:
    for i in range(len(a_train)):
        file.write(str(a_train[i]))
        if i < len(a_train) - 1:
            file.write(', ')

# Writing test accuracy to file
with open('Accuracy_a_test.txt', 'w') as file:
    for i in range(len(a_test)):
        file.write(str(a_test[i]))
        if i < len(a_test) - 1:
            file.write(', ')

# Writing loss values to file
with open('Accuracy_loss.txt', 'w') as file:
    for i in range(len(lossaaa)):
        file.write(str(lossaaa[i]))
        if i < len(lossaaa) - 1:
            file.write(', ')

In [None]:
# Convert tensors to lists
Inf_list = [tensor.item() for tensor in Inf]
Var_av_list = [tensor.item() for tensor in Var_all]
Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]

# Writing Inf_list to file
with open('Inf_OOD.txt', 'w') as file:
    for i in range(len(Inf_list)):
        file.write(str(Inf_list[i]))
        if i < len(Inf_list) - 1:
            file.write(', ')

# Writing Var_av_list to file
with open('Var_all_OOD.txt', 'w') as file:
    for i in range(len(Var_av_list)):
        file.write(str(Var_av_list[i]))
        if i < len(Var_av_list) - 1:
            file.write(', ')

# Writing Generalization_Ratio_list to file
with open('Generalization_Ratio_list.txt', 'w') as file:
    for i in range(len(Generalization_Ratio_list)):
        file.write(str(Generalization_Ratio_list[i]))
        if i < len(Generalization_Ratio_list) - 1:
            file.write(', ')

In [None]:
import time
import os  # Import the os module

os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist
model_path = './Models/'  # Define the model path

rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'

# Save the training results
current_model_path = model_path + rq + "_model.pkl"  # Create the full path for the model file with the current timestamp
torch.save(net, current_model_path)  # Save the model to the specified path
print("Saved model file: " + current_model_path)  # Print the path of the saved model file

In [None]:
# Display the highest value that appears during training
print("train:", max(a_train))  # Print the highest value in the training data
print("test:", max(a_test))  # Print the highest value in the test data
print("loss:", min(lossaaa))  # Print the minimum value of the loss

train: 0.5879
test: 0.5198
loss: 1.0879650712013245


In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)  # Load CIFAR-10 training dataset
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)  # Load CIFAR-10 test dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=50000, shuffle=True, num_workers=0)  # Create DataLoader for training set
t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for test set

trainloader_iter = iter(trainloader)  # Create an iterator for the training DataLoader
tl_image, tl_label = next(trainloader_iter)  # Get the next batch of images and labels from the training DataLoader
tl_image = tl_image.cuda()  # Move training images to GPU
tl_label = tl_label.cuda()  # Move training labels to GPU
tl_imageoutputs = net(tl_image)  # [batch, 10]  # Get the outputs from the network for the training images
predict_y = torch.max(tl_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)  # Calculate accuracy for the training set
print("train:", float(accuracy))  # Print the training accuracy

t_data_iter = iter(t_loader)  # Create an iterator for the test DataLoader
t_image, t_label = next(t_data_iter)  # Get the next batch of images and labels from the test DataLoader
t_image = t_image.cuda()  # Move test images to GPU
t_label = t_label.cuda()  # Move test labels to GPU
t_imageoutputs = net(t_image)  # [batch, 10]  # Get the outputs from the network for the test images
predict = torch.max(t_imageoutputs, dim=1)[1]  # Get the predicted labels by finding the max output along dimension 1
accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)  # Calculate accuracy for the test set
print("test:", float(accuracy_t))  # Print the test accuracy

Files already downloaded and verified
train: 0.56694
test: 0.501
