In [4]:
import pandas as pd
import torch
from torch.utils.data import Dataset,DataLoader
from sklearn.preprocessing import  LabelEncoder


#读取数据，转换成Numpy数组
data = pd.read_csv("data/sonar_dataset.csv", header=None)
X = data.iloc[:, 0:60].values
y = data.iloc[:, 60].values

#将标签转换成整数型
encoder = LabelEncoder()#创建了一个LabelEncoder对象。LabelEncoder是scikit-learn库中的一个工具，用于将分类变量（类别标签）转换为整数编码
encoder.fit(y)#encoder.fit(y): 这一行使用LabelEncoder对象对y进行拟合（fitting）。拟合的过程会分析y中的不同类别，并为每个类别分配一个唯一的整数编码
y = encoder.transform(y)#y = encoder.transform(y): 这一行将原始的类别标签（字符串或其他格式）转换为由LabelEncoder分配的整数编码。转换后，y中存储的就是整数编码的类别标签，而不再是原始的类别字符串


X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y,dtype=torch.float32).reshape(-1,1)

loader = DataLoader(list(zip(X,y)),shuffle=True,batch_size=16)
for X_batch, y_batch in loader:
    print(X_batch, y_batch)
    break

tensor([[2.6500e-02, 4.4000e-02, 1.3700e-02, 8.4000e-03, 3.0500e-02, 4.3800e-02,
         3.4100e-02, 7.8000e-02, 8.4400e-02, 7.7900e-02, 3.2700e-02, 2.0600e-01,
         1.9080e-01, 1.0650e-01, 1.4570e-01, 2.2320e-01, 2.0700e-01, 1.1050e-01,
         1.0780e-01, 1.1650e-01, 2.2240e-01, 6.8900e-02, 2.0600e-01, 2.3840e-01,
         9.0400e-02, 2.2780e-01, 5.8720e-01, 8.4570e-01, 8.4670e-01, 7.6790e-01,
         8.0550e-01, 6.2600e-01, 6.5450e-01, 8.7470e-01, 9.8850e-01, 9.3480e-01,
         6.9600e-01, 5.7330e-01, 5.8720e-01, 6.6630e-01, 5.6510e-01, 5.2470e-01,
         3.6840e-01, 1.9970e-01, 1.5120e-01, 5.0800e-02, 9.3100e-02, 9.8200e-02,
         5.2400e-02, 1.8800e-02, 1.0000e-02, 3.8000e-03, 1.8700e-02, 1.5600e-02,
         6.8000e-03, 9.7000e-03, 7.3000e-03, 8.1000e-03, 8.6000e-03, 9.5000e-03],
        [2.0100e-02, 2.6000e-03, 1.3800e-02, 6.2000e-03, 1.3300e-02, 1.5100e-02,
         5.4100e-02, 2.1000e-02, 5.0500e-02, 1.0970e-01, 8.4100e-02, 9.4200e-02,
         1.2040e-01, 4.2000

In [6]:
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

#准备数据集和DataLoader
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

loader = DataLoader(list(zip(X_train,y_train)),shuffle=True,batch_size=16)

#创建模型
model = nn.Sequential(
    nn.Linear(60,60),
    nn.ReLU(),
    nn.Linear(60,30),
    nn.ReLU(),
    nn.Linear(30,1),
    nn.Sigmoid()
)

#训练模型
n_epochs = 200
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
model.train()
for epoch in range(n_epochs):
    for X_batch, y_batch in loader: #使用DataLoader实例分批训练
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#训练后估计准确度
model.eval()
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 69.84%
