In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

data = pd.read_csv("data/sonar_dataset.csv", header=None)
X = data.iloc[:, 0:60].values
y = data.iloc[:, 60].values

encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)

X = torch.tensor(X,dtype=torch.float32)
y = torch.tensor(y,dtype=torch.float32).reshape(-1,1)

#将数据按照7：3拆分成训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True) #也可以使用trainset, testset = random_split(dataset, [0.7, 0.3])来拆分训练集和测试集

loader = DataLoader(list(zip(X_train,y_train)), shuffle=True, batch_size=16)

model = nn.Sequential(
    nn.Linear(60,60),
    nn.ReLU(),
    nn.Linear(60,30),
    nn.ReLU(),
    nn.Linear(30,1),
    nn.Sigmoid()
)

n_epochs = 200
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)#创建了一个随机梯度下降（SGD）优化器
model.train()
for epoch in range(n_epochs):
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad() #将之前的梯度信息归零，以准备计算新一轮的梯度
        loss.backward() #反向传播计算梯度
        optimizer.step() #使用优化器根据计算得到的梯度来更新模型的参数

model.eval()#将模型切换到评估模式（evaluation mode）
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f" % (acc*100))

Model accuracy: 57.14


In [5]:
#使用Dataset创建数据集,继承Dataset类，并重写init，len，getitem函数
from torch.utils.data import Dataset
class SonarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        features = self.X[idx]
        target = self.y[idx]
        return features, target

In [7]:
#使用Dataset以及结合DataLoader完成数据初始化
dataset = SonarDataset(X_train, y_train)
loader = DataLoader(dataset, shuffle=True, batch_size=16)

# create model
model = nn.Sequential(
    nn.Linear(60, 60),
    nn.ReLU(),
    nn.Linear(60, 30),
    nn.ReLU(),
    nn.Linear(30, 1),
    nn.Sigmoid()
)

# Train the model
n_epochs = 200
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
model.train()
for epoch in range(n_epochs):
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# evaluate accuracy after training
model.eval()
y_pred = model(torch.tensor(X_test, dtype=torch.float32))
y_test = torch.tensor(y_test, dtype=torch.float32)
acc = (y_pred.round() == y_test).float().mean()#.round() 操作后，将预测的浮点数值四舍五入为最接近的整数。这是因为在二元分类问题中，模型的输出一般是一个介于 0 到 1 之间的浮点数，代表属于正类的概率。通过将其四舍五入，可以将它转换为二元标签（0 或 1）
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

  self.X = torch.tensor(X, dtype=torch.float32)
  self.y = torch.tensor(y, dtype=torch.float32)


Model accuracy: 73.02%


  y_pred = model(torch.tensor(X_test, dtype=torch.float32))
  y_test = torch.tensor(y_test, dtype=torch.float32)
