In [1]:
from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv1d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm1d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layers = []
        if expand_ratio != 1:
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
        layers.extend([
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
            nn.Conv1d(hidden_channel, out_channel, kernel_size=1, bias=False),
            nn.BatchNorm1d(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=40, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        features.append(ConvBNReLU(1, input_channel, stride=2))
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        features.append(ConvBNReLU(input_channel, last_channel, 1))
        self.features = nn.Sequential(*features)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [2]:
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np

def plot_matrix(conf_matrix, dev_list, save_path):
    plt.figure(figsize=(20, 16))
    plt.imshow(conf_matrix, cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()

    thresh = conf_matrix.max() / 2.
    for i in range(conf_matrix.shape[0]):
        for j in range(conf_matrix.shape[1]):
            plt.text(j, i, conf_matrix[i, j],
                     ha="center", va="center",
                     color="white" if conf_matrix[i, j] > thresh else "black", fontsize=6)

    tick_marks = np.arange(len(dev_list))
    plt.xticks(tick_marks, dev_list)
    plt.yticks(tick_marks, dev_list)
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig(save_path)
    plt.show()

In [3]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.categories = sorted(os.listdir(data_dir))
        self.data = []
        self.transform = transform
        for category in self.categories:
            category_dir = os.path.join(data_dir, category)
            category_data = sorted(os.listdir(category_dir))
            self.data.extend([(os.path.join(category_dir, file), self.categories.index(category)) for file in category_data])

    def __getitem__(self, index):
        file_path, label = self.data[index]
        data = np.load(file_path)
        image = Image.fromarray(data.astype(np.uint8))
        image = transform(image)
        return image, label

    def __len__(self):
        return len(self.data)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

transform = transforms.Compose([ transforms.Grayscale(num_output_channels = 1),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5), (0.5))])

train_dataset = CustomDataset("features/train_npy",transform=transform)
train_num = len(train_dataset)
dev_list = train_dataset.categories

batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=0)

validate_dataset = CustomDataset("features/val_npy",transform=transform)
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=0)

print("using {} images for training, {} images for validation.".format(train_num, val_num))

net = MobileNetV2(num_classes=27)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

epochs = 20
save_path = './MobileNet_parameters.pth'
best_f1 = 0.0
train_accurate_list = []
val_accurate_list = []
f1_list = []
recall_list = []

for epoch in range(epochs):
    net.train()
    running_loss = 0.0
    train_acc = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        images = images.reshape(images.shape[0], 1, 1500)
        optimizer.zero_grad()
        outputs = net(images.to(device))
        predict_y = torch.max(outputs, dim=1)[1]
        train_acc += torch.eq(predict_y, labels.to(device)).sum().item()
        loss = loss_function(outputs,labels.to(device))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        rate = (step + 1) / len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss:{:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
    print()
    train_accurate = train_acc / train_num
    train_accurate_list.append(train_accurate)
    net.eval()
    acc = 0.0  
    val = torch.tensor([])
    pre = torch.tensor([])
    with torch.no_grad():
        for val_data in validate_loader:
            val_images, val_labels = val_data
            val_images = val_images.reshape(val_images.shape[0], 1, 1500)
            outputs = net(val_images.to(device))
            predict_y = torch.max(outputs, dim=1)[1]
            pre = torch.cat([pre.to(device), predict_y.to(device)])
            val = torch.cat([val.to(device), val_labels.to(device)])
            acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
    val_accurate = acc / val_num
    val_accurate_list.append(val_accurate)
    f1 = f1_score(val.cpu(), pre.cpu(), average='macro')
    recall = recall_score(val.cpu(), pre.cpu(), average='macro')

    f1_list.append(f1)
    recall_list.append(recall)
    if f1 > best_f1:
        best_f1 = f1
        best_pre = pre
        best_val = val
        torch.save(net.state_dict(), save_path)
        torch.save(best_pre, 'pre_val_label/best_pre_MobileNet.pt')
        torch.save(best_val, 'pre_val_label/best_val_MobileNet.pt')
    print('[epoch %d] train_loss: %.3f train_accuracy: %.3f val_accuracy: %.3f  recall: %.3f  f1: %.3f' %
          (epoch + 1, running_loss / step, train_accurate, val_accurate, recall, f1))
    with open("MobileNet_result_npy.txt", 'a') as file:
        file.write("[epoch " + str(epoch + 1) + "]" + "  " + "train_accuracy:" + str(train_accurate) + "  " + "val_accuracy:" + str(val_accurate) + "  " + "recall:" + str(recall) + "  " + "f1:" + str(f1) + '\n')
print('Finished Training')
iterations = range(1, len(train_accurate_list) + 1)
with open("MobileNet_npy_plt_data.txt", 'a') as file:
    file.write("iterations:" + str(iterations) +
               "train_accurate_list:" + str(train_accurate_list) +
               "val_accurate_list:" + str(val_accurate_list) +
               "f1_list:" + str(f1_list) +
               "recall_list:" + str(recall_list) +
               "dev_list:" + str(dev_list) + '\n')
conf_matrix = confusion_matrix(best_val.cpu(),best_pre.cpu())
plot_matrix(conf_matrix,dev_list,"MobileNet_confusion_matrix_npy.png")

using cuda:0 device.
using 185997 images for training, 46497 images for validation.
train loss:100%[**************************************************->]0.701
[epoch 1] train_loss: 1.154 train_accuracy: 0.663 val_accuracy: 0.669  recall: 0.627  f1: 0.600
train loss:100%[**************************************************->]0.990
[epoch 2] train_loss: 0.435 train_accuracy: 0.854 val_accuracy: 0.860  recall: 0.802  f1: 0.799
train loss:100%[**************************************************->]0.302
[epoch 3] train_loss: 0.314 train_accuracy: 0.891 val_accuracy: 0.800  recall: 0.757  f1: 0.753
train loss:100%[**************************************************->]0.189
[epoch 4] train_loss: 0.255 train_accuracy: 0.911 val_accuracy: 0.639  recall: 0.639  f1: 0.585
train loss:100%[**************************************************->]0.771
[epoch 5] train_loss: 0.214 train_accuracy: 0.924 val_accuracy: 0.661  recall: 0.653  f1: 0.635
train loss:100%[*********************************************