In [None]:
import torch
import pickle
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
import torch.nn as nn
from sklearn.metrics import f1_score, classification_report

with open('train.pkl', 'rb') as file:
    train_data_raw = pickle.load(file)
with open('valid.pkl', 'rb') as file:
    valid_data_raw = pickle.load(file)
with open('test.pkl', 'rb') as file:
    test_data_raw = pickle.load(file)
def data_processing(raw_data, index):
    new_index = []
    for i in range(12):
        if i not in index:
            new_index.append(i)
    new_data = []
    p_index = {}
    for k in range(len(new_index)):
        p_index[new_index[k]] = new_index[k] - k
    for j in raw_data:
        if j[-2] in new_index:
            j[-2] -= p_index[j[-2]]
            new_data.append(j)
    return new_data
index = []
train_data_raw1 = data_processing(train_data_raw, index)
valid_data_raw1 = data_processing(valid_data_raw, index)
train_data = torch.tensor(train_data_raw1, dtype=torch.float)[:, :-2]
valid_data = torch.tensor(valid_data_raw1, dtype=torch.float)[:, :-2]
train_label = torch.tensor(train_data_raw1, dtype=torch.long)[:, -2]
valid_label = torch.tensor(valid_data_raw1, dtype=torch.long)[:, -2]
test_data = torch.tensor(test_data_raw, dtype=torch.float)[:, :-2]
test_label = torch.tensor(test_data_raw, dtype=torch.long)[:, -1]
estimator = LogisticRegression()  # 逻辑回归
selector = RFE(estimator, 400, step=300)
selector = selector.fit(train_data, train_label)
  # True的特征就是最终得到的特征
train_data = train_data[:, selector.support_]
valid_data = valid_data[:, selector.support_]
test_data = test_data[:, selector.support_]

class My_dataset(Dataset):
    def __init__(self, data, label):
        super(My_dataset).__init__()
        self.data = data
        self.label = label
    
    def __getitem__(self, index):
        x = self.data[index]
        y = self.label[index]
        return x, y
    def __len__(self):
        return len(self.data)

def make_weights_for_balanced_classes(images, nclasses):                     
    count = [0] * nclasses                                                      
    for item in images:                                                         
        count[item] += 1                                                     
    weight_per_class = [0.] * nclasses                                      
    N = float(sum(count))                                                   
    for i in range(nclasses):    
        if i == 3:                                               
            weight_per_class[i] = N/float(count[i]) / 15.0
        elif i == 1:
            weight_per_class[i] = N/float(count[i]) * 3
        elif i ==9:
            weight_per_class[i] = N/float(count[i]) * 8.0
        else:
            weight_per_class[i] = N/float(count[i])

    print(weight_per_class)                              
    weight = [0] * len(images)                                              
    for idx, val in enumerate(images):                                          
        weight[idx] = weight_per_class[val]                                  
    return weight    
train_dataset1 = My_dataset(train_data, train_label)
weights = make_weights_for_balanced_classes(train_dataset1.label, 12)
weights = torch.DoubleTensor(weights)                                     
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))    
train_dataloader1 = DataLoader(train_dataset1, batch_size=1024, sampler=sampler)
valid_dataset1 = My_dataset(valid_data, valid_label)
valid_dataloader1 = DataLoader(valid_dataset1, batch_size=1, shuffle=False)
test_dataset1 = My_dataset(test_data, test_label)
test_dataloader1 = DataLoader(test_dataset1, batch_size=1, shuffle=False)

class FocalLoss(nn.Module):
    def __init__(self, weight=None, reduction='mean', gamma=0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(weight=weight, reduction=reduction)
    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

class My_module(nn.Module):
    def __init__(self, n_class):
        super(My_module, self).__init__()
        self.bn1 = nn.BatchNorm1d(400)
        self.fc1 = nn.Linear(400, 256) 
        self.fc2 = nn.Linear(256, n_class)
        self.bn2 = nn.BatchNorm1d(256)
        self.act1 = nn.LeakyReLU()
        self.loss = FocalLoss(gamma=2)
    def forward(self, x, y=None):
        # x = torch.nn.functional.dropout(x, p=0.5)
        x = self.bn1(x)
        x = self.fc1(x)
        x = torch.nn.functional.dropout(x, p=0.5)
        x = self.bn2(x)
        x = self.act1(x)
        x = self.fc2(x)
        if y is not None:
            loss = self.loss(x, y)
            return loss
        else:
            return x
network1 = My_module(n_class=12)
train_losses = []
test_losses = []
test_counter = [i * len(train_dataloader1.dataset) for i in range(20 + 1)]
epochs = 500
max_acc = 0
def train(epochs,network,train_dataloader,valid_dataloader, lr):
    optimizer = torch.optim.SGD(network.parameters(), lr=lr, momentum=0.9)  
    max_acc = 0
    for epoch in range(epochs):
        all_pred1 = []
        all_target1 = []
        network.train()
        for batch_idx, (data, target) in enumerate(train_dataloader):
            optimizer.zero_grad()
            loss = network(data, target)
            loss = loss.mean()  # 并行会算出一组loss，需要进行平均才能backward
            loss.backward()
            optimizer.step()
            output = network(data)
            preds = torch.argmax(output, dim=-1)
            if batch_idx % 40 == 0:
                print('Train Epoch: {}[{}/{}({:.0f}%)]\tloss:{:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_dataloader.dataset),
                        40. * batch_idx / len(train_dataloader), loss.item()))
                train_losses.append(loss.item())
            if len(all_pred1) == 0:
                all_pred1.append(preds.detach().cpu().numpy())
                all_target1.append(target.detach().cpu().numpy())
            else:
                all_pred1[0] = np.append(all_pred1[0], preds.detach().cpu().numpy(), axis=0)
                all_target1[0] = np.append(all_target1[0], target.detach().cpu().numpy(), axis=0)
        #     print(all_pred)
        all_pred1, all_target1 = all_pred1[0], all_target1[0]
        
        acc = (all_pred1 == all_target1).mean()
        print('train: %.4f' % (acc))
        test(epoch,network,valid_dataloader)

def test(epoch,network,valid_dataloader):
    global max_acc
    all_pred = []
    all_target = []
    network.eval()
    with torch.no_grad():
        for data, target in valid_dataloader:
            output = network(data)
            output = torch.log_softmax(output, dim=-1)
            # test_loss = torch.nn.functional.nll_loss(output, target)
            preds = torch.argmax(output, dim=-1)
            if len(all_pred) == 0:
                all_pred.append(preds.detach().cpu().numpy())
                all_target.append(target.detach().cpu().numpy())
            else:
                all_pred[0] = np.append(all_pred[0], preds.detach().cpu().numpy(), axis=0)
                all_target[0] = np.append(all_target[0], target.detach().cpu().numpy(), axis=0)
    #     print(all_pred)
    all_pred, all_target = all_pred[0], all_target[0]
    t = classification_report(all_target, all_pred)
    acc = (all_pred == all_target).mean()
    if acc > max_acc:
        max_acc = acc
        print("save model, and max acc is: %.4f" %(acc))
        torch.save(network.state_dict(), 'best_model.pth')
    print('Testing Accuracy : %.4f' % (acc))
    print(t)
    return all_pred
lr = 0.01
train(epochs,network1,train_dataloader1,valid_dataloader1,lr)
