## Import Libraries

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import csv
import time
import random
import matplotlib.image as img
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')

import os

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler
from torch.utils.data import TensorDataset
from torchvision.utils import make_grid
import torchvision.models as models
import time
from PIL import Image
from tqdm import tqdm
import gc
from torch.utils.data import DataLoader
# from torchsummary import summary

## Import Dataset

In [4]:
class Musicdata(torch.utils.data.Dataset):
    def __init__(self, npz, mode='train'):
        npzfile = np.load(npz)
        self.mode = mode   
        self.x = npzfile['arr_0']
        self.x = [one.reshape(1,640,128) for one in self.x]
        self.y = npzfile['arr_1']

        
                    
    def __getitem__(self, index):
        data = torch.tensor(self.x[index], dtype = torch.float32)
        #data = transforms(data)
        if self.mode == 'test': 
            return data
        genre = [np.where(one == 1)[0][0] for one in self.y]
        genre = torch.tensor(int(genre[index]))
        return data, genre

    def __len__(self):
        return len(self.x)

In [5]:
dataset_train = Musicdata('/content/drive/MyDrive/shuffled_train.npz', mode='train')
dataset_val = Musicdata('/content/drive/My Drive/shuffled_valid.npz', mode='val')
# dataset_test = Musicdata('/content/drive/My Drive/test_arr.npz', mode='test')
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset_train, batch_size=32, num_workers=8, shuffle=False)
val_loader = DataLoader(dataset_val, batch_size=32, num_workers=8, shuffle=False)
# test_loader = DataLoader(dataset_test, batch_size=128, shuffle=False)

## Building Model Architecture

## CNN(VGG-based) + LSTM
The following CNN architecture is based on VGG net.

In [None]:
# def configuration(number_of_block, initial_size=1, th_1=3, th_2=5):
#     cfg_list = []
#     c_out = initial_size
#     for i in range(number_of_block):
#         if i < th_1: # i=0,1,2,...,th_1 只有一層捲積
#             c_out = initial_size * 2**i
#             cfg_list += [int(c_out), "M"]
#         if i >= th_1 and i < th_2: # 
#             c_out *= 2
#             cfg_list += [int(c_out), int(c_out), "M"]
#         if i >= th_2: # i=th_2,...,number_of_block 只有一層捲積，開始變小
#             c_out /= 2
#             cfg_list += [int(c_out), "M"]
#     return cfg_list, int(c_out)

# def make_layers(cfg, batch_norm=False):
#     layers = []
#     in_channels = 1
#     for v in cfg:
#         if v == 'M':
#             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
#         else:
#             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
#             if batch_norm:
#                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
#             else:
#                 layers += [conv2d, nn.ReLU(inplace=True)]
#             in_channels = v
#     return nn.Sequential(*layers)

In [None]:
# class VGG(nn.Module):
#     def __init__(self, features, number_of_block, c_out, hidden_size, num_layers):
#         super(VGG, self).__init__()
#         self.features = features
#         self.c_out = c_out
#         self.number_of_block = number_of_block
#         self.LSTM = nn.LSTM(c_out, hidden_size, num_layers, batch_first = True)
#         self.classifier = nn.Sequential(
#                 nn.Dropout(),
#                 nn.Linear(int(640*128/4**number_of_block)*hidden_size, 512),
#                 nn.ReLU(True),
#                 nn.Dropout(),
#                 nn.Linear(512, 512),
#                 nn.ReLU(True),
#                 nn.Linear(512, 8),
#         )
#         self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
#         for m in self.modules():
#             if isinstance(m, nn.Conv2d):
#                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#                 m.weight.data.normal_(0, math.sqrt(2. / n))
#                 m.bias.data.zero_()

#     def forward(self, x):
#         x = self.features(x)
#         #print(x.shape) # batch_size, seq_length, hidden_size
#         x = x.view(-1, int(640*128/(4**self.number_of_block)), self.c_out)
#         x, _ = self.LSTM(x)
#         # print(x.shape) # batch_size, seq_length, hidden_size
#         x = x.reshape(x.size(0), -1)
#         x = self.classifier(x)
#         return x

## CNN(ResNet-based) + LSTM
The following architecture is based on ResNet.

### Define `Conv1` layer and `Bottleneck` class

In [6]:
def Conv1(in_planes, places, stride=2):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_planes, out_channels=places, kernel_size=7, stride=stride, padding=3, bias=False),
        nn.BatchNorm2d(places),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

class Bottleneck(nn.Module):
  '''
  in_places: size of input channel
  places: 進行前兩次捲機時的 output channel size
  stride: 第二次卷機時的 stride
  expansion: bottleneck 輸出的 channel 為 places*expansion
  downsampling: 是否要改變 H, W
  '''
  def __init__(self,in_places,places, stride=1, downsampling=False, expansion = 4):
        super(Bottleneck,self).__init__()
        self.expansion = expansion
        self.downsampling = downsampling

        self.bottleneck = nn.Sequential(
            # 第一次是為了改變 channel 的維度，H, W 不變
            nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # 第二次是是做捲機，H, W 會變
            nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # 第三次是為了改變 channel 的維度為 places*self.expansion，H, W 不變
            nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(places*self.expansion),
        )
        

        if self.downsampling:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(places*self.expansion)
            )
        self.relu = nn.ReLU(inplace=True)
  def forward(self, x):
        residual = x
        out = self.bottleneck(x)

        if self.downsampling:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out

### Define `ResNet` class

In [7]:
class RESTM(nn.Module):
    def __init__(self, blocks, initial_size, expansion=4, hidden_size=32, num_layers=2, fc_neural=512, fc_block=1):
        super(RESTM,self).__init__()
        self.expansion = expansion
        self.initial_size = initial_size

        self.conv1 = Conv1(in_planes=1, places=self.initial_size)
        set_size = self.initial_size
        self.layer1 = self.make_layer(in_places=set_size, places=set_size, block=blocks[0], stride=1, expansion=self.expansion)
        set_size *= self.expansion
        self.layer2 = self.make_layer(in_places=set_size, places=int(set_size/2), block=blocks[1], stride=2, expansion=self.expansion)
        set_size *= self.expansion
        self.layer3 = self.make_layer(in_places=int(set_size/2), places=int(set_size/4), block=blocks[2], stride=2, expansion=self.expansion)
        set_size *= self.expansion
        self.layer4 = self.make_layer(in_places=int(set_size/4), places=int(set_size/8), block=blocks[3], stride=2, expansion=self.expansion)
        set_size *= self.expansion

        self.avgpool = nn.AvgPool2d(2, stride=1)
        self.LSTM = nn.LSTM(int(set_size/8), hidden_size, num_layers, batch_first = True)
        
        self.fc = self.make_fc(hidden_size=hidden_size, fc_neural=fc_neural, fc_block=fc_block)
#         self.fc = nn.Sequential(
#                 nn.Dropout(),
#                 nn.Linear(3*19*hidden_size, fc_neural),
#                 nn.BatchNorm1d(fc_neural),
#                 nn.ReLU(True),
#                 nn.Dropout(),

#                 nn.Linear(fc_neural, 8),
#         )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def make_layer(self, in_places, places, block, stride, expansion):
        layers = []
        layers.append(Bottleneck(in_places, places, stride, downsampling=True, expansion=expansion))
        for i in range(1, block):
            layers.append(Bottleneck(places*self.expansion, places, expansion=expansion))

        return nn.Sequential(*layers)
    
    def make_fc(self, hidden_size, fc_neural, fc_block):
        layers = [nn.Dropout(), nn.Linear(3*19*hidden_size, fc_neural), nn.BatchNorm1d(fc_neural), nn.ReLU(True), nn.Dropout()]
        for i in range(fc_block):
            layers += [nn.Linear(fc_neural, fc_neural), nn.BatchNorm1d(fc_neural), nn.ReLU(True), nn.Dropout()]
        layers += [nn.Linear(fc_neural, 8)]
        
        return nn.Sequential(*layers)


    def forward(self, x):
#         size before everything: bach size * 1 * 640 * 128
#         size after conv1:       bach size * initial size * 160 * 32
#         size after layer1:      bach size * setting      * 160 * 32
#         size after layer2:      bach size * setting      *  80 * 16
#         size after layer3:      bach size * setting      *  40 *  8
#         size after layer4:      bach size * setting      *  20 *  4
#         size after avgpooling:  bach size * setting      *  19 *  3
#         size after view:        bach size * 57 * setting
#         size after LSTM:        bach size * (57 * setting)

#         print("size before everything:", x.shape)
        x = self.conv1(x)
#         print("size after conv1", x.shape)
        x = self.layer1(x)
#         print("size after layer1", x.shape)
        x = self.layer2(x)
#         print("size after layer2",x.shape)
        x = self.layer3(x)
#         print("size after layer3",x.shape)
        x = self.layer4(x)
#         print("size after layer4",x.shape)
        x = self.avgpool(x)
#         print("size after average pooling",x.shape)
        #x = x.view(x.size(0), -1)
        x = x.view(x.size(0), int(3*19), -1)
#         print("size after view", x.shape)
        x, _ = self.LSTM(x)
        x = x.reshape(x.size(0), -1)
#         print("size after LSTM", x.shape)
        x = self.fc(x)
        return x

# model = ResNet([1, 2, 1, 1], initial_size=64)
# summary(model, (1,128,640))

### Define `configuration` function

In [8]:
def configuration(initial_size=64, blocks_1=1, blocks_2=1, blocks_3=1, blocks_4=1, expansion=4, hidden_size=32, num_layers=2, fc_neural=512, fc_block=1):
    block = [blocks_1, blocks_2, blocks_3, blocks_4]
    return RESTM(block, initial_size, expansion, hidden_size, num_layers, fc_neural, fc_block)

# model = configuration(1, 2, 3, 4, 5, 6, 7)
# summary(model, (1,128,640))

### Examples for RestNet

In [None]:
# def ResNet50():
#     return configuration(1, 3, 4, 6, 3)

# def ResNet101():
#     return configuration(1, 3, 4, 23, 3)

# def ResNet152():
#     return configuration(1, 3, 8, 36, 3)

# #  以下為範例
# input = torch.randn(64, 1, 128, 640)

# model = ResNet50()
# print(model)    
# out = model(input)
# print(out.shape)
# model = ResNet101()
# print(model) 
# out = model(input)
# print(out.shape)
# model = ResNet152()
# print(model) 
# out = model(input)
# print(out.shape)

## Criterion

In [9]:
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()

In [10]:
def train(input_data, model, criterion, optimizer, output_disable = False):

    model.train()
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data, position = 0)
    for data in pbar:
        pbar.set_description("Training")
        music, genre = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()
        outputs = model(music)
        loss = criterion(outputs, genre) ##data type
        loss.backward()
        optimizer.step() 
        _, predicted = torch.max(outputs.data, 1)
        total_count += genre.size(0) 
        acc_count += (predicted == genre).sum()  
        loss_list.append(loss.item())
        

    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

In [11]:
def val(input_data, model, criterion, output_disable = False):
    model.eval()
    
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data)
    with torch.no_grad():
        for data in pbar:
            pbar.set_description("Validation")
            music, genre = data[0].cuda(), data[1].cuda()
            outputs = model(music)
            loss = criterion(outputs, genre) ##data type
            _, predicted = torch.max(outputs.data, dim=1)
            total_count += genre.size(0)
            acc_count += (predicted == genre).sum()
            loss_list.append(loss.item())


    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

## CNN(VGG-based) + LSTM training function
The following CNN architecture is based on VGG net.

In [None]:
# def RNN(parameters ,max_epochs = 3):    
#     learning_rate = parameters[0]
#     initial_size, th_1, th_2, number_of_block, hidden_size, num_layers = tuple(int(param) for param in parameters[1:])
#     print("Learning rate =", learning_rate, "Number of block =",number_of_block, "Hidden size =", hidden_size, "Number of Layer =", num_layers)
#     train_acc_list = []
#     train_loss_list = []
#     val_acc_list = []
#     val_loss_list = []
    
#     design, c_out = configuration(number_of_block, initial_size, th_1, th_2)
#     convolution = make_layers(design, batch_norm=True)
#     model = VGG(convolution, number_of_block, c_out, hidden_size, num_layers)
#     print(design)
#     model = model.cuda()
#     optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

#     for epoch in range(1, max_epochs + 1):
#         print('=' * 20, 'Epoch', epoch, '=' * 20)
#         train_acc, train_loss = train(train_loader, model, criterion, optimizer, True)
#         val_acc, val_loss = val(val_loader, model, criterion, True)

#         train_acc_list.append(train_acc)
#         train_loss_list.append(train_loss)
#         val_acc_list.append(val_acc)
#         val_loss_list.append(val_loss)

#         print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
#         print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))
#     return val_acc

## CNN(ResNet-based) + LSTM Training Process
The following architecture is based on ResNet.

In [12]:
def RESTM_Train(parameters ,max_epochs = 3):    
    learning_rate = parameters[0]
    initial_size, blocks_1, blocks_2, blocks_3, blocks_4, expansion, hidden_size, num_layers, fc_neural, fc_block = tuple(int(param) for param in parameters[1:])
    
    print(" learning_rate =", learning_rate, "initial_size =", initial_size, "\n",\
          "blocks_1 =",blocks_1, "blocks_2 =",blocks_2, "blocks_3 =",blocks_3, "blocks_4 =",blocks_4, "\n", \
          "expansion =", expansion, "hidden_size =", hidden_size, "num_layers =", num_layers)
    
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    
    model = configuration(initial_size, blocks_1, blocks_2, blocks_3, blocks_4, expansion, hidden_size, num_layers, fc_neural, fc_block)
    model = model.cuda()
    optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    for epoch in range(1, max_epochs + 1):
        print('=' * 20, 'Epoch', epoch, '=' * 20)
        train_acc, train_loss = train(train_loader, model, criterion, optimizer, True)
        val_acc, val_loss = val(val_loader, model, criterion, True)

        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)

        print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
        print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))
    return val_acc

In [13]:
def comparison(parameter, lower_bound, upper_bound, integer = False, mutation_rate = 0.02):
    new_parameter = parameter
    if parameter > upper_bound:
        new_parameter =  np.random.uniform(low = (lower_bound+upper_bound)/2, high = upper_bound)
    elif parameter < lower_bound:
        new_parameter =  np.random.uniform(low = lower_bound, high = (lower_bound+upper_bound)/2)
    else:
        if random.uniform(0,1) < mutation_rate:
            new_parameter = np.random.uniform(low = lower_bound, high = upper_bound)
            print("Mutation occur:", parameter, "-->", new_parameter)
    if integer:
        new_parameter = int(new_parameter)
    return new_parameter

In [14]:
p_num = 4  ##粒子個數
max_iteration = 10 ##迭代次數
score_ind = 11 ## 分數的 index
record = np.zeros((1, max_iteration), dtype='float')
boundary = [[1e-5, 5e-3],[1,64],[1,8],[1,16],[1,30],[1,16],[4,4],[1, 50], [1, 5], [32,256], [1,3]]
x = np.array([[1.5e-4, 59,  3,1,1,1,  4, 30, 1, 256, 1, 0.398750],
              [  5e-4, 40,  3,6,5,3,  4, 30, 1, 128, 2, 0.426250],
              [  1e-4, 48,  4,3,2,1,  4, 31, 2,  64, 3, 0.261250],
              [  5e-3, 20,  6,6,16,3, 4, 20, 2,  32, 1, 0.373750],
             ])
x = x.transpose()
for j in range(p_num):
    for k in range(score_ind):
        if k != 0:
            x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], True,  mutation_rate = 1)
    gc.collect()
    with torch.no_grad():
        torch.cuda.empty_cache()
    time.sleep(10)
    x[score_ind,j] = RESTM_Train(x[:score_ind,j].tolist())
            
            # x = np.array([[0.0005, 32, 3, 5, 4, 35, 2, 0.558750], 
#               [0.0003112003852983763, 44, 2, 5, 4, 35, 2, 0.523750],
#               [0.0007612228704013985, 26, 2, 6, 4, 39, 2, 0.513750],
#               [0.0006310624186537511, 33, 2, 6, 3, 46, 2, 0.475000]])

print(x)

Mutation occur: 59.0 --> 9.28349091705468
Mutation occur: 3.0 --> 3.802785009240509
Mutation occur: 1.0 --> 2.979880589021152
Mutation occur: 1.0 --> 1.7548820628863693
Mutation occur: 1.0 --> 6.321236139772816
Mutation occur: 4.0 --> 4.0
Mutation occur: 30.0 --> 34.206741906560794
Mutation occur: 1.0 --> 2.5883131438438585
Mutation occur: 256.0 --> 46.11230173382132
Mutation occur: 1.0 --> 2.815320191647121
 learning_rate = 0.00015 initial_size = 9 
 blocks_1 = 3 blocks_2 = 2 blocks_3 = 1 blocks_4 = 6 
 expansion = 4 hidden_size = 34 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [01:57<00:00,  1.71it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.11it/s]


Train Acc: 0.121677 Train Loss: 2.200221
  Val Acc: 0.122500   Val Loss: 2.085360
Mutation occur: 40.0 --> 7.361439534318143
Mutation occur: 3.0 --> 5.369619206889515
Mutation occur: 6.0 --> 9.640898095086621
Mutation occur: 5.0 --> 9.096909983200336
Mutation occur: 3.0 --> 1.4746419308187755
Mutation occur: 4.0 --> 4.0
Mutation occur: 30.0 --> 3.491265734709537
Mutation occur: 1.0 --> 3.361479157287473
Mutation occur: 128.0 --> 56.24594116858338
Mutation occur: 2.0 --> 2.1338849836658436


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0005 initial_size = 7 
 blocks_1 = 5 blocks_2 = 9 blocks_3 = 9 blocks_4 = 1 
 expansion = 4 hidden_size = 3 num_layers = 3


Training: 100%|██████████| 200/200 [02:02<00:00,  1.63it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.92it/s]


Train Acc: 0.131373 Train Loss: 2.178712
  Val Acc: 0.125000   Val Loss: 2.085954
Mutation occur: 48.0 --> 29.853994621922535
Mutation occur: 4.0 --> 1.7639283507914034
Mutation occur: 3.0 --> 7.9806647459310565
Mutation occur: 2.0 --> 19.690956696023804
Mutation occur: 1.0 --> 7.728049153510513
Mutation occur: 4.0 --> 4.0
Mutation occur: 31.0 --> 14.934986306696356
Mutation occur: 2.0 --> 3.051461205152754
Mutation occur: 64.0 --> 168.67586746997554
Mutation occur: 3.0 --> 2.785343355180209
 learning_rate = 0.0001 initial_size = 29 
 blocks_1 = 1 blocks_2 = 7 blocks_3 = 19 blocks_4 = 7 
 expansion = 4 hidden_size = 14 num_layers = 3


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:12<00:00,  1.51it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  6.19it/s]


Train Acc: 0.128089 Train Loss: 2.192874
  Val Acc: 0.123750   Val Loss: 2.088036
Mutation occur: 20.0 --> 15.026381342167403
Mutation occur: 6.0 --> 3.0005316740270165
Mutation occur: 6.0 --> 3.642261494671878
Mutation occur: 16.0 --> 15.492503076100515
Mutation occur: 3.0 --> 5.4614645404019235
Mutation occur: 4.0 --> 4.0
Mutation occur: 20.0 --> 46.482215497731644
Mutation occur: 2.0 --> 4.503647067073385
Mutation occur: 32.0 --> 53.61367940199521
Mutation occur: 1.0 --> 1.887440740378979


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.005 initial_size = 15 
 blocks_1 = 3 blocks_2 = 3 blocks_3 = 15 blocks_4 = 5 
 expansion = 4 hidden_size = 46 num_layers = 4


Training: 100%|██████████| 200/200 [02:04<00:00,  1.61it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.22it/s]

Train Acc: 0.120895 Train Loss: 2.133294
  Val Acc: 0.125000   Val Loss: 2.080975
[[1.5000e-04 5.0000e-04 1.0000e-04 5.0000e-03]
 [9.0000e+00 7.0000e+00 2.9000e+01 1.5000e+01]
 [3.0000e+00 5.0000e+00 1.0000e+00 3.0000e+00]
 [2.0000e+00 9.0000e+00 7.0000e+00 3.0000e+00]
 [1.0000e+00 9.0000e+00 1.9000e+01 1.5000e+01]
 [6.0000e+00 1.0000e+00 7.0000e+00 5.0000e+00]
 [4.0000e+00 4.0000e+00 4.0000e+00 4.0000e+00]
 [3.4000e+01 3.0000e+00 1.4000e+01 4.6000e+01]
 [2.0000e+00 3.0000e+00 3.0000e+00 4.0000e+00]
 [4.6000e+01 5.6000e+01 1.6800e+02 5.3000e+01]
 [2.0000e+00 2.0000e+00 2.0000e+00 1.0000e+00]
 [1.2250e-01 1.2500e-01 1.2375e-01 1.2500e-01]]





In [15]:
def communication(i, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind):
  write_name = str(i)+ str(local_number) +".csv"
  pd.DataFrame(Gb).to_csv(add+write_name)
  for receive_number in receive_numbers:
    patience = 0
    check = True
    while check:
      try:
        read_name = str(i)+ str(receive_number) +".csv"
        Gb_from_others = pd.read_csv(add+read_name)
        Gb_from_others = Gb_from_others.to_numpy().transpose()[1,]  
        print("receive from", receive_number, Gb_from_others)
        if Gb_from_others[score_ind] > Gb[score_ind]:
          Gb = Gb_from_others

        check = False
      except:
        time.sleep(1)
        patience += 1
        if patience > 240:
          print("break!!!!!! I loss my patience")
          break
  return Gb

In [22]:
w = 0.5
c1 = 2
c2 = 2
V = np.zeros((score_ind, p_num))
Pb = x
Gb = Pb[:, np.argmax(Pb[score_ind])].copy()

In [23]:
local_number = 3
receive_numbers = [1,2,4]
iteration_per_communicate = 1
add = '/content/drive/MyDrive/update/'

In [None]:
# for i in range(max_iteration):
#     print('-' * 20, 'Iteration', i+1, '-' * 20)
#     for j in range(p_num):
#         print('.' * 20, 'Particle', j+1, '.' * 20)
#         for k in range(score_ind):
#             V[k,j] = w * V[k,j] + c1 * random.uniform(0,1) * (Pb[k,j] - x[k,j]) + c2 * random.uniform(0,1) * (Gb[k] - x[k,j])  ##計算速度
#             x[k,j] += V[k,j]  ##更新位置
#             x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], (k != 0))
#         gc.collect()
#         with torch.no_grad():
#             torch.cuda.empty_cache()
#         time.sleep(10)
#         x[score_ind,j] = RESTM_Train(x[:score_ind,j].tolist())
#     for j in range(p_num): ##每個粒子的最好位置更新
#         if Pb[score_ind,j] < x[score_ind,j]:
#             Pb[:,j] = x[:,j]
#             print(Gb[score_ind], max(Pb[score_ind]))
#     if Gb[score_ind] < max(Pb[score_ind]): 
#         Gb = Pb[:, np.argmax(Pb[score_ind])].copy()
#     print(Gb)

In [21]:
Gb = communication(0, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind)

receive from 1 [5.0000e-03 7.0000e+00 2.0000e+00 7.0000e+00 1.1000e+01 9.0000e+00
 4.0000e+00 4.5000e+01 1.0000e+00 8.0000e+01 2.0000e+00 2.4375e-01]
receive from 2 [5.000e-04 3.100e+01 3.000e+00 3.000e+00 1.000e+00 4.000e+00 4.000e+00
 1.200e+01 2.000e+00 2.330e+02 2.000e+00 1.975e-01]
receive from 4 [5.000e-03 2.900e+01 5.000e+00 4.000e+00 2.500e+01 3.000e+00 4.000e+00
 2.600e+01 1.000e+00 2.410e+02 1.000e+00 2.825e-01]


In [24]:
for i in range(1,(max_iteration+1)):
    print('-' * 20, 'Iteration', i+1, '-' * 20)
    for j in range(p_num):
        print('.' * 20, 'Particle', j+1, '.' * 20)
        for k in range(score_ind):
            V[k,j] = w * V[k,j] + c1 * random.uniform(0,1) * (Pb[k,j] - x[k,j]) + c2 * random.uniform(0,1) * (Gb[k] - x[k,j])  ##計算速度
            x[k,j] += V[k,j]  ##更新位置
            x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], (k != 0))
        gc.collect()
        with torch.no_grad():
            torch.cuda.empty_cache()
        time.sleep(10)
        x[score_ind,j] = RESTM_Train(x[:score_ind,j].tolist())
    for j in range(p_num): ##每個粒子的最好位置更新
        if Pb[score_ind,j] < x[score_ind,j]:
            Pb[:,j] = x[:,j]
            print(Gb[score_ind], max(Pb[score_ind]))
    if Gb[score_ind] < max(Pb[score_ind]): 
        Gb = Pb[:, np.argmax(Pb[score_ind])].copy()
    # Gb = np.random.uniform(low = -100, high = 100, size = 12)
    if i % iteration_per_communicate == 0:
        Gb = communication(i, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind)
    print(i,Gb)

-------------------- Iteration 2 --------------------
.................... Particle 1 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.000703372254392943 initial_size = 7 
 blocks_1 = 4 blocks_2 = 2 blocks_3 = 2 blocks_4 = 6 
 expansion = 4 hidden_size = 16 num_layers = 2


Training: 100%|██████████| 200/200 [02:01<00:00,  1.65it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.53it/s]


Train Acc: 0.124805 Train Loss: 2.179096
  Val Acc: 0.145000   Val Loss: 2.080452
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0005 initial_size = 7 
 blocks_1 = 5 blocks_2 = 9 blocks_3 = 9 blocks_4 = 1 
 expansion = 4 hidden_size = 3 num_layers = 3


Training: 100%|██████████| 200/200 [02:04<00:00,  1.61it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.60it/s]


Train Acc: 0.128089 Train Loss: 2.174431
  Val Acc: 0.125000   Val Loss: 2.082404
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.00017516021309850488 initial_size = 9 
 blocks_1 = 5 blocks_2 = 7 blocks_3 = 13 blocks_4 = 3 
 expansion = 4 hidden_size = 10 num_layers = 3


Training: 100%|██████████| 200/200 [02:02<00:00,  1.63it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.34it/s]


Train Acc: 0.123710 Train Loss: 2.189720
  Val Acc: 0.125000   Val Loss: 2.083004
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.003841260731636701 initial_size = 7 
 blocks_1 = 4 blocks_2 = 5 blocks_3 = 13 blocks_4 = 2 
 expansion = 4 hidden_size = 4 num_layers = 2


Training: 100%|██████████| 200/200 [02:01<00:00,  1.65it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.98it/s]


Train Acc: 0.172818 Train Loss: 2.068175
  Val Acc: 0.240000   Val Loss: 1.935196
receive from 1 [5.0000e-03 4.0000e+00 2.0000e+00 1.0000e+00 1.0000e+01 6.0000e+00
 4.0000e+00 3.8000e+01 1.0000e+00 2.5300e+02 1.0000e+00 3.1625e-01]
receive from 2 [4.30762155e-03 2.70000000e+01 6.00000000e+00 3.00000000e+00
 1.10000000e+01 3.00000000e+00 4.00000000e+00 3.60000000e+01
 1.00000000e+00 2.35000000e+02 1.00000000e+00 3.07500000e-01]
receive from 4 [5.0000e-03 2.9000e+01 5.0000e+00 4.0000e+00 2.5000e+01 3.0000e+00
 4.0000e+00 2.6000e+01 1.0000e+00 2.4100e+02 1.0000e+00 2.9375e-01]
1 [5.0000e-03 4.0000e+00 2.0000e+00 1.0000e+00 1.0000e+01 6.0000e+00
 4.0000e+00 3.8000e+01 1.0000e+00 2.5300e+02 1.0000e+00 3.1625e-01]
-------------------- Iteration 3 --------------------
.................... Particle 1 ....................
Mutation occur: 1.8904137638032203 --> 14.33445237463976


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0013346760917578896 initial_size = 11 
 blocks_1 = 3 blocks_2 = 6 blocks_3 = 13 blocks_4 = 14 
 expansion = 4 hidden_size = 11 num_layers = 2


Training: 100%|██████████| 200/200 [02:07<00:00,  1.57it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.23it/s]


Train Acc: 0.143728 Train Loss: 2.131756
  Val Acc: 0.213750   Val Loss: 1.965908
.................... Particle 2 ....................
Mutation occur: 46.86929779785205 --> 42.965737748163164


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0036886729641906904 initial_size = 2 
 blocks_1 = 2 blocks_2 = 2 blocks_3 = 9 blocks_4 = 10 
 expansion = 4 hidden_size = 42 num_layers = 1


Training: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.46it/s]


Train Acc: 0.195027 Train Loss: 2.073359
  Val Acc: 0.318750   Val Loss: 1.861409
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.002853595213989329 initial_size = 25 
 blocks_1 = 1 blocks_2 = 4 blocks_3 = 5 blocks_4 = 6 
 expansion = 4 hidden_size = 49 num_layers = 1


Training: 100%|██████████| 200/200 [01:54<00:00,  1.74it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.69it/s]


Train Acc: 0.239443 Train Loss: 1.962845
  Val Acc: 0.208750   Val Loss: 2.135234
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.004226602931440847 initial_size = 2 
 blocks_1 = 3 blocks_2 = 4 blocks_3 = 6 blocks_4 = 2 
 expansion = 4 hidden_size = 13 num_layers = 1


Training: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.21it/s]


Train Acc: 0.177823 Train Loss: 2.074627
  Val Acc: 0.236250   Val Loss: 1.953572
receive from 1 [5.0000e-03 4.0000e+00 2.0000e+00 1.0000e+00 1.0000e+01 6.0000e+00
 4.0000e+00 3.8000e+01 1.0000e+00 2.5300e+02 1.0000e+00 3.1625e-01]
receive from 2 [5.0000e-03 4.0000e+00 2.0000e+00 1.0000e+00 1.0000e+01 6.0000e+00
 4.0000e+00 3.8000e+01 1.0000e+00 2.5300e+02 1.0000e+00 3.1625e-01]
receive from 4 [2.52460787e-03 2.20000000e+01 1.00000000e+00 6.00000000e+00
 1.60000000e+01 1.00000000e+00 4.00000000e+00 4.50000000e+01
 1.00000000e+00 1.93000000e+02 1.00000000e+00 3.25000000e-01]
2 [2.52460787e-03 2.20000000e+01 1.00000000e+00 6.00000000e+00
 1.60000000e+01 1.00000000e+00 4.00000000e+00 4.50000000e+01
 1.00000000e+00 1.93000000e+02 1.00000000e+00 3.25000000e-01]
-------------------- Iteration 4 --------------------
.................... Particle 1 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.003673331148959778 initial_size = 22 
 blocks_1 = 4 blocks_2 = 5 blocks_3 = 19 blocks_4 = 1 
 expansion = 4 hidden_size = 34 num_layers = 2


Training: 100%|██████████| 200/200 [02:02<00:00,  1.64it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.77it/s]


Train Acc: 0.201282 Train Loss: 2.052613
  Val Acc: 0.236250   Val Loss: 2.112919
.................... Particle 2 ....................
 learning_rate = 0.0034255070354100225 initial_size = 29 
 blocks_1 = 1 blocks_2 = 1 blocks_3 = 12 blocks_4 = 13 
 expansion = 4 hidden_size = 40 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training:  94%|█████████▎| 187/200 [01:57<00:08,  1.56it/s]

KeyboardInterrupt: ignored

In [20]:
!rm /content/drive/MyDrive/update/*

rm: cannot remove '/content/drive/MyDrive/update/*': No such file or directory
