In [None]:
import torch
from torch.utils.data import Dataset,DataLoader,TensorDataset,Subset
import pandas as pd
import numpy as np
from torch.nn import Module,ReLU,Linear
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [None]:
data=pd.read_csv('./data/data_process/data.csv',encoding='utf-8')
data

In [None]:
torch.cuda.is_available()

In [None]:
#torch.cuda.get_device_name(0)

In [None]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  

device=torch.device(dev)

In [None]:
class Data(Dataset):
    def __init__(self,data):
        super().__init__()
        self.ds_demo=data.values
        self.ds_demo=np.random.permutation(self.ds_demo)
        self.y_raw=self.ds_demo[:,0]
        self.x_raw=self.ds_demo[:,1:]
        self.max_array=self.ds_demo.max(axis=0)
        self.min_array=self.ds_demo.min(axis=0)
        self.data_shape=self.ds_demo.shape
        self.data=np.empty_like(self.ds_demo)
        for i in range(self.data_shape[1]-1):
            self.data[:,i]=(self.ds_demo[:,i+1]-self.min_array[i+1])/(self.max_array[i+1]-self.min_array[i+1])
        self.data[:,-1]=self.ds_demo[:,0]
        #以上是完成特征值的归一化
        self.data=torch.tensor(self.data,dtype=torch.float32).to(device)
    def __len__(self):
        return len(self.ds_demo)
    def __getitem__(self,index):
        return self.data[index,0:-1],self.data[index,-1]

ds=Data(data)

bs=16
#批处理大小为16

ds_train=Subset(dataset=ds,indices=range(15000))
ds_valid=Subset(dataset=ds,indices=range(15000,len(ds)))
dl_train=DataLoader(ds_train,batch_size=bs,shuffle=True)
dl_valid=DataLoader(ds_valid,batch_size=bs*2)
dl_train_fullsize=DataLoader(ds_train,batch_size=len(ds_train))
dl_valid_fullsize=DataLoader(ds_valid,batch_size=len(ds_valid))

In [None]:
class Company_Classifier(Module):
    def __init__(self):
        super().__init__()
        self.l1=Linear(70,64) 
        self.r1=ReLU()
        self.l2=Linear(64,32)
        self.r2=ReLU()
        self.l3=Linear(32,16)
        self.r3=ReLU()
        self.l4=Linear(16,8)
        self.r4=ReLU()
        self.l5=Linear(8,4)
        self.r5=ReLU()
        self.l6=Linear(4,2)
    def forward(self,x):
        x=self.l1(x)
        x=self.r1(x)
        x=self.l2(x)
        x=self.r2(x)
        x=self.l3(x)
        x=self.r3(x)
        x=self.l4(x)
        x=self.r4(x)
        x=self.l5(x)
        x=self.r5(x)
        x=self.l6(x)
        x=torch.sigmoid(x)
        return x

In [None]:
model=Company_Classifier().to(device)

def Accuracy(preds,yb):
    out=torch.argmax(preds,dim=-1)
    return (out==yb).float().mean()
#评估准确率的函数

epochs=40
#迭代40个epoch

loss_history=[]
valid_loss_history=[]
train_loss_history=[]

accuracy_history=[]
valid_accuracy_history=[]
train_accuracy_history=[]

loss_func=F.cross_entropy

opt=optim.Adam(model.parameters(),lr=1e-4)
#使用Adam优化算法

for epoch in range(epochs):
    model.train()
    for xb,yb in dl_train:
        Input=xb.to(device)
        Target=yb.to(device)
        Output=model(Input)
        loss=loss_func(Output,Target.long())
        accuracy=Accuracy(Output,Target)
                
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    model.eval()
    #每个epoch训练完之后评估模型性能
    valid_loss=sum(loss_func(model(xb),yb.long()) for xb,yb in dl_valid)/len(dl_valid)
    valid_loss_history.append(valid_loss)
    train_loss=sum(loss_func(model(xb),yb.long()) for xb,yb in dl_train)/len(dl_train)
    train_loss_history.append(train_loss)
    
    for xb,yb in dl_valid_fullsize:
        Input=xb.to(device)
        Target=yb.to(device)
        Output=model(Input)
        valid_accuracy=Accuracy(Output,Target)
        valid_accuracy_history.append(valid_accuracy)
    for xb,yb in dl_train_fullsize:
        Target=yb.to(device)
        Input=xb.to(device)
        Output=model(Input)
        train_accuracy=Accuracy(Output,Target)
        train_accuracy_history.append(train_accuracy)
    print("epoch:",epoch)
    print("train_loss:",train_loss)
    print("valid_loss:",valid_loss)
    print("train_accuracy:",train_accuracy)
    print("valid_accuracy:",valid_accuracy)


In [None]:
print("result")
print()

fig=plt.figure(figsize=(10,5))

plt.subplots_adjust(wspace=0.2,hspace=0.5)

ax1=fig.add_subplot(2,1,1)
plt.plot(range(len(train_loss_history)),train_loss_history)
plt.plot(range(len(valid_loss_history)),valid_loss_history,'orange')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(['train_loss','valid_loss'])

ax2=fig.add_subplot(2,1,2)
plt.plot(range(len(train_accuracy_history)),train_accuracy_history)
plt.plot(range(len(valid_accuracy_history)),valid_accuracy_history,'orange')
plt.xlabel('epochs')
plt.ylabel('accuracy')
ax2.legend(['train_accuracy','valid_accuracy'])

#print(valid_loss_history)
#print(valid_accuracy_history)