In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
import torch.optim as optim
from torchvision import transforms  # Import the transforms module for data preprocessing
import matplotlib.pyplot as plt
import numpy as np
import os  # Import the os module to operate the file path
from PIL import Image  # Import the Image module in the PIL library for image processing
from tqdm import tqdm  # Progress bar module
from time import sleep 
import time # Used to record training time

In [2]:
# Check if CUDA is installed and available for your GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Setting Hyperparameters
EPOCH = 10
BATCH_SIZE = 32
# show_size = 12         # number of pictures used to show the comparison of AE reconstruction
LR = 0.01              # Learning rate

In [4]:
# 设置随机数种子
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    # random.seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 2809
setup_seed(SEED)

### To load Dataset

In [5]:
# The class for loading datasets is used to load tensor datasets 
# whose labels are stored in a separate txt file in the format of "filename\t label"
class CustomDataset(Data.Dataset):
    def __init__(self, data_dir, label_file, transform=None):
        super().__init__()  # Calling the parent class's constructor
        self.data_dir = data_dir  # Path to the data
        self.label_file = label_file  # The path of the label text
        self.transform = transform  # Data preprocessing operations
        self.samples = self._load_samples()  # Loading dataset sample information

    def _load_samples(self):
        samples = []  # List storing sample information
        with open(self.label_file, 'r') as f:  # Open the label text file
            for line in f:  # Read the contents of a labeled text file line by line
                data_name, label_str = line.strip().split('\t ')  # Get the data file name and label by separating each line with tabs + spaces
                label_list = []
                for ele in label_str:
                    if ele != ' ':
                        label_list.append(float(ele))
                label = torch.tensor(label_list) # To store label as tensor
                data_path = os.path.join(self.data_dir, data_name)  # The full path to the stitched tensor file
                samples.append((data_path, label))  # Combine the data path and label into a tuple and add it to the sample list
        return samples  # Return to sample list

    def __len__(self):
        return len(self.samples)  # Returns the number of samples in the dataset

    def __getitem__(self, index):
        data_path, label = self.samples[index]  # Get the data path and label at the specified index
        data = torch.load(data_path)
        data = torch.squeeze(data, (0, 1)) # Change the tensor shape from [1,1,60] to [60]
        return data, label  # Returns the preprocessed data and label

In [6]:
# Set the image data path and label text path
train_data_dir = './dataset/data_train/'  # Path to the image data
train_label_file = './dataset/labels_train.txt'  # The path of the label text
test_data_dir = './dataset/data_test/' 
test_label_file = './dataset/labels_test.txt'

# Creating a custom dataset instance
train_dataset = CustomDataset(train_data_dir, train_label_file, transform=None)
test_dataset = CustomDataset(test_data_dir, test_label_file, transform=None)

print("Size of train dataset:")
print(train_dataset.__len__())
print("Size of test dataset:")
print(test_dataset.__len__())

# Creating a Data Loader
# Shuffle the order during training
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Size of train dataset:
18000
Size of test dataset:
600


### Model

In [7]:
# to build module
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(60, 30)
        self.l2 = torch.nn.Linear(30, 12)
        self.l3 = torch.nn.Linear(12, 6)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        return F.sigmoid(self.l3(x))

In [8]:
# Create an autoencoder object and move the model to the GPU
model = Net().to(device)
# Defining loss function and optimizer
criterion = nn.MSELoss() # The loss function is the mean square error
optimizer = optim.Adam(model.parameters(), lr=LR)

### To train and test the model

In [9]:
train_loss_list = [] # The average training loss of every epoch
test_loss_list = []  # The average testing loss of every epoch
loss_list = [] # The average training loss of every batch
time_list = [] # The time afer training every batch

# Training the Autoencoder
start_time = time.time()
for epoch in range(EPOCH):
    train_loss = 0
    count = 0
    for train_data in tqdm(train_loader):
        datas, labels = train_data
        datas = datas.to(device)
        # Forward Propagation
        output = model(datas)                      # Calculating Losses
        loss = criterion(output, labels.to(device))                       # Calculating Losses
        time_list.append(time.time() - start_time)          # Recording time
        loss_list.append(loss.data)
        count += 1
        train_loss += loss.data
        # Backpropagation and optimizer optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        sleep(0.001)
    train_loss_list.append(train_loss/count) # Record the average training loss of this epoch
    print("Epoch[{}/{}], loss:{:.4f}".format(epoch+1, EPOCH, loss.data))
    
    # After each epoch of training, the test set is tested once and the average loss is recorded.
    dataiter = iter(test_loader)
    test_loss = 0
    count = 0
    # Loop through all iterators
    for j in range(len(iter(test_loader))):
        # Get the next batch of images and labels from the iterator
        datas, labels = next(dataiter)

        # Use the model to perform inference, 
        # process the acquired image data, and save the result in the output variable
        output = model(datas.to(device))
        count += 1
        test_loss += criterion(output, labels.to(device)).data
    test_loss_list.append(test_loss/count)

100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [02:59<00:00,  3.13it/s]


Epoch[1/10], loss:0.0026


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:12<00:00, 43.80it/s]


Epoch[2/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:12<00:00, 43.85it/s]


Epoch[3/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:12<00:00, 43.72it/s]


Epoch[4/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:13<00:00, 42.56it/s]


Epoch[5/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:13<00:00, 43.12it/s]


Epoch[6/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:13<00:00, 40.99it/s]


Epoch[7/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:14<00:00, 38.42it/s]


Epoch[8/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:14<00:00, 37.57it/s]


Epoch[9/10], loss:0.0000


100%|████████████████████████████████████████████████████████████████████████████████| 563/563 [00:13<00:00, 42.61it/s]


Epoch[10/10], loss:0.0000


### Record the loss value of the training process in a csv file

file name:"Net_MSE_ReLU_0.01_60_30_12_6.csv" 

Activation Function_Loss Function_Learning Rate_number of networks in each layer

In [10]:
import csv

with open("./epoch_train_test_param/Net_MSE_ReLU_0.01_60_30_12_6.csv","w", newline='') as csvfile: 
    writer = csv.writer(csvfile)

    # To write columns_name
    writer.writerow(["epoch","train_loss","test_loss"])
    # To write many rows
    for i in range(EPOCH):
        writer.writerow([i+1, train_loss_list[i].item(), test_loss_list[i].item()])

In [11]:
with open("./time_loss_param/Net_MSE_ReLU_0.01_60_30_12_6.csv","w", newline='') as csvfile: 
    writer = csv.writer(csvfile)

    writer.writerow(["time","loss"])
    for i in range(len(time_list)):
        writer.writerow([time_list[i], loss_list[i].item()])

### To save parametures of the model

In [12]:
torch.save(model.state_dict(), "./model_param/Net_MSE_60_30_12_6_ReLU.pth")

### To test the model

In [13]:
# After each epoch of training, the test set is tested once and the average loss is recorded.
dataiter = iter(test_loader)
count_right = 0
count = 0
# Loop through all iterators
for j in range(len(iter(test_loader))):
    # Get the next batch of images and labels from the iterator
    datas, labels = next(dataiter)
    # data = data.flatten(2, 3)

    # Use the model to perform inference, 
    # process the acquired image data, and save the result in the output variable
    output = model(datas.to(device))
    for i in range(output.size()[0]):
        if torch.round(output[i]).equal(labels[i].to(device)):
            count_right += 1
        count += 1
        
# accuracy        
print(float(count_right/count))

0.9983333333333333
