In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
import time
import numpy as np
import ResNet56
import mydatasets
from imblearn.over_sampling import SMOTE

def loadData():
    cifar10path = '.'
    data_transforms = transforms.Compose([

        transforms.RandomCrop(32, padding=4), #随机裁剪
        transforms.RandomHorizontalFlip(), # 翻转图片
        transforms.ToTensor()
    ])
    train_dataset = mydatasets.MyCifar10(cifar10path, True, data_transforms, True)
    train_loader = mydatasets.MyDataLoader(train_dataset, 128)
    test_dataset = mydatasets.MyCifar10(cifar10path, False, data_transforms)
    test_loader = mydatasets.MyDataLoader(test_dataset, 128)
    return (train_loader, test_loader)



def train(trainLoader, model, lossFunction, optimizer, device,data_new):
    model.train()
    tloss, totalBatch = 0, 0
    correct, totalSize = 0, 0
  #  delete_num = 4000
   # data_new = smote1(trainLoader)
    new_total =5000
    for batch, (data, label) in enumerate(trainLoader):
        totalBatch += 1
        # convert
        for i in range(label.shape[0]):
            if(label[i]==1):
                #global new_total
                data[i]=torch.FloatTensor(data_new[new_total])
                new_total = new_total+1
            
    
        data, label = data.to(device), label.to(device)
        label = label.to(torch.int64)
        # calculate
        pred = model(data)
        loss = lossFunction(pred, label)
        # optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # stats
        tloss += loss.item()
        totalSize += label.shape[0]
        correct += (pred.argmax(1) == label).type(torch.float).sum().item()
        # print
        if (batch+1) % 10 == 0:
            nowTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            print(f'[{nowTime}]    Batch: {batch+1:>4}, Loss:{loss.item():>7.6f}, AvgAcc:{100*correct/totalSize:>6.4f}%')
    return (tloss/totalBatch, correct/totalSize)


def test(testLoader, model, lossFunction, device):
    model.eval()
    tloss, totalBatch = 0, 0
    correct = 0
    totalCount = len(testLoader)
    with torch.no_grad():
        for batch, (data, label) in enumerate(testLoader):
            totalBatch += 1
            # convert
            data, label = data.to(device), label.to(device)
            label = label.to(torch.int64)
            # calculate
            pred = model(data)
            loss = lossFunction(pred, label)
            # add
            tloss += loss.item()
            correct += (pred.argmax(1) == label).type(torch.float).sum().item()
    return (tloss/totalBatch, correct/totalCount)

def del_tensor_ele(arr,index):
    arr1 = arr[0:index]
    arr2 = arr[index+1:]
    return torch.cat((arr1,arr2),dim=0)


def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Using {device} device')

    trainLoader, testLoader = loadData()

    model = ResNet56.resnet56().to(device)
    lossFunction = nn.CrossEntropyLoss()
    lr = 1e-1
    optimizer = torch.optim.SGD(model.parameters(), lr, momentum=0.9, weight_decay=1e-4)
    data_new = smote1(trainLoader)#选择填补数据的方法
    #data_new = smote(trainLoader)
    #data_new = smote2(trainLoader)
    print("Begin Training")
    for epoch in range(64):
        if epoch == 32:
            lr/=10
        elif epoch == 48:
            lr/=10

        print(f"Epoch:{epoch+1:>3}, learning rate = {lr}")
        trainLoss, trainAcc = train(trainLoader, model, lossFunction, optimizer, device,data_new)
        testLoss, testAcc = test(testLoader, model, lossFunction, device)
        with open('./result.txt', 'a') as f:
            nowTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            f.write(f'[{nowTime}] Epoch{epoch+1:>3d}\n')
            f.write(f'    Train: Loss {trainLoss:>7.6f}, Acc {trainAcc:>6.4f}%\n')
            f.write(f'    Test:  Loss {testLoss :>7.6f}, Acc {testAcc :>6.4f}%\n')
        torch.save(model.state_dict(), './saves/model.pth')
    print("Done")


if __name__ == '__main__':
    main()
#batch = 390
#print(batch *128)

Using cpu device
Begin Training
Epoch:  1, learning rate = 0.1
[2022-05-10 05:40:19]    Batch:   10, Loss:5.065450, AvgAcc:10.6250%
[2022-05-10 05:40:28]    Batch:   20, Loss:2.432364, AvgAcc:11.4062%
[2022-05-10 05:40:36]    Batch:   30, Loss:2.360449, AvgAcc:11.9531%
[2022-05-10 05:40:45]    Batch:   40, Loss:2.323648, AvgAcc:11.6602%
[2022-05-10 05:40:53]    Batch:   50, Loss:2.263470, AvgAcc:11.5469%
[2022-05-10 05:41:01]    Batch:   60, Loss:2.504783, AvgAcc:11.4844%
[2022-05-10 05:41:10]    Batch:   70, Loss:2.281576, AvgAcc:11.3393%
[2022-05-10 05:41:18]    Batch:   80, Loss:2.285307, AvgAcc:11.2598%
[2022-05-10 05:41:26]    Batch:   90, Loss:2.296317, AvgAcc:11.3281%
[2022-05-10 05:41:35]    Batch:  100, Loss:2.399196, AvgAcc:11.4922%
[2022-05-10 05:41:43]    Batch:  110, Loss:2.295704, AvgAcc:11.5838%
[2022-05-10 05:41:52]    Batch:  120, Loss:2.267356, AvgAcc:11.9076%
[2022-05-10 05:42:00]    Batch:  130, Loss:2.301147, AvgAcc:12.0072%
[2022-05-10 05:42:09]    Batch:  140, Lo

In [1]:
def smote(trainLoader):
    min_data =[]
    max_data = []
    num =0
    delete_num = 4000
    for batch, (data, label) in enumerate(trainLoader):
        #label_1 = label
        ex =0
        if (delete_num != 0):
            #print(delete_num)
            for i in range(label.shape[0]):
                if (delete_num == 0):
                    break
                i = i-ex
                if(label[i]==1):
                    label = del_tensor_ele(label,i)
                    data  = del_tensor_ele(data,i)
                    ex = ex+1
                    num =num +1
                    delete_num = delete_num-1
        else:
        #print(1)
            for i in range(label.shape[0]):
                if(label[i]==1):
                    min_data.append(data[i])
        for i in range(label.shape[0]):
            if(label[i]==0):
                max_data.append(data[i])

                
    for i in range(len(min_data)):
        min_data[i] = min_data[i].numpy()
    min_data = np.array(min_data) 
#print("min 0k")
    for i in range(len(max_data)):
        max_data[i] = max_data[i].numpy()
    max_data = np.array(max_data)    

##转换

    shuju =np.vstack((max_data,min_data))
    mid = shuju.reshape(5991,-1)
    biaoqian = np.ones(5991)
    for i in range(len(biaoqian)):
        if (i<5000):
            biaoqian[i]=0
    #print(biaoqian.shape)

#smote
    
#定义SMOTE模型，random_state相当于随机数种子的作用
    smo = SMOTE(random_state=42)
    X_smo, y_smo = smo.fit_resample(mid, biaoqian)

    new_data = X_smo.reshape(10000,3,32,32)
    return new_data

In [91]:
def smote1(trainLoader):
    min_data =[]
    max_data = []
    num =0
    delete_num = 4000
    for batch, (data, label) in enumerate(trainLoader):
        #label_1 = label
        ex =0
        if (delete_num != 0):
            #print(delete_num)
            for i in range(label.shape[0]):
                if (delete_num == 0):
                    break
                i = i-ex
                if(label[i]==1):
                    label = del_tensor_ele(label,i)
                    data  = del_tensor_ele(data,i)
                    ex = ex+1
                    num =num +1
                    delete_num = delete_num-1
        else:
        #print(1)
            for i in range(label.shape[0]):
                if(label[i]==1):
                    min_data.append(data[i])
        for i in range(label.shape[0]):
            if(label[i]==0):
                max_data.append(data[i])

                
    for i in range(len(min_data)):
        min_data[i] = min_data[i].numpy()
    min_data = np.array(min_data) 
#print("min 0k")
    for i in range(len(max_data)):
        max_data[i] = max_data[i].numpy()
    max_data = np.array(max_data)    

##转换

    shuju =np.vstack((max_data,min_data))
    mid = shuju.reshape(5991,-1)
    biaoqian = np.ones(5991)
    for i in range(len(biaoqian)):
        if (i<5000):
            biaoqian[i]=0
    #print(biaoqian.shape)

#smote
    
#定义SMOTE模型，random_state相当于随机数种子的作用
    smo =BorderlineSMOTE(random_state=42,kind="borderline-1")
    X_smo, y_smo = smo.fit_resample(mid, biaoqian)

    new_data = X_smo.reshape(10000,3,32,32)
    return new_data

In [2]:
def smote2(trainLoader):
    min_data =[]
    max_data = []
    num =0
    delete_num = 4000
    for batch, (data, label) in enumerate(trainLoader):
        #label_1 = label
        ex =0
        if (delete_num != 0):
            #print(delete_num)
            for i in range(label.shape[0]):
                if (delete_num == 0):
                    break
                i = i-ex
                if(label[i]==1):
                    label = del_tensor_ele(label,i)
                    data  = del_tensor_ele(data,i)
                    ex = ex+1
                    num =num +1
                    delete_num = delete_num-1
        else:
        #print(1)
            for i in range(label.shape[0]):
                if(label[i]==1):
                    min_data.append(data[i])
        for i in range(label.shape[0]):
            if(label[i]==0):
                max_data.append(data[i])

                
    for i in range(len(min_data)):
        min_data[i] = min_data[i].numpy()
    min_data = np.array(min_data) 
#print("min 0k")
    for i in range(len(max_data)):
        max_data[i] = max_data[i].numpy()
    max_data = np.array(max_data)    

##转换

    shuju =np.vstack((max_data,min_data))
    mid = shuju.reshape(5991,-1)
    biaoqian = np.ones(5991)
    for i in range(len(biaoqian)):
        if (i<5000):
            biaoqian[i]=0
    #print(biaoqian.shape)

#smote
    
#定义SMOTE模型，random_state相当于随机数种子的作用
    smo =BorderlineSMOTE(random_state=42,kind="borderline-2")
    X_smo, y_smo = smo.fit_resample(mid, biaoqian)

    new_data = X_smo.reshape(10000,3,32,32)
    return new_data

In [5]:
import random
from sklearn.neighbors import NearestNeighbors
import numpy as np
class Smote:
    def __init__(self,samples,N=10,k=5):
        self.n_samples,self.n_attrs=samples.shape
        self.N=N
        self.k=k
        self.samples=samples
        self.newindex=0
       # self.synthetic=np.zeros((self.n_samples*N,self.n_attrs))

    def over_sampling(self):
        N=int(self.N/100)
        self.synthetic = np.zeros((self.n_samples * N, self.n_attrs))
        neighbors=NearestNeighbors(n_neighbors=self.k).fit(self.samples)
        print ('neighbors',neighbors)
        for i in range(len(self.samples)):
            nnarray=neighbors.kneighbors(self.samples[i].reshape(1,-1),return_distance=False)[0]
            #print nnarray
            self._populate(N,i,nnarray)
        return self.synthetic


    # for each minority class samples,choose N of the k nearest neighbors and generate N synthetic samples.
    def _populate(self,N,i,nnarray):
        for j in range(N):
            nn=random.randint(0,self.k-1)
            dif=self.samples[nnarray[nn]]-self.samples[i]
            gap=random.random()
            self.synthetic[self.newindex]=self.samples[i]+gap*dif
            self.newindex+=1
a=np.array([[1,2,3],[4,5,6],[2,3,1],[2,1,2],[2,3,4],[2,3,4]])
s=Smote(a,N=100)
print( s.over_sampling())


neighbors NearestNeighbors()
[[1.12140022 2.12140022 2.75719956]
 [2.34428963 3.34428963 4.34428963]
 [1.98260712 2.98260712 1.03478575]
 [2.         2.03516405 3.03516405]
 [2.         3.         4.        ]
 [2.         3.         4.        ]]
