<a href="https://colab.research.google.com/github/SparKgod1/Skills-and-Expertise/blob/master/Linear_Regression/Ordinary_Least_Squares.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 数据集

## 导入数据集

In [1]:
from sklearn.datasets import load_iris
diabetes = load_iris(as_frame=True)
diabetes.data.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [2]:
diabetes.data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
diabetes.target.describe()

count    150.000000
mean       1.000000
std        0.819232
min        0.000000
25%        0.000000
50%        1.000000
75%        2.000000
max        2.000000
Name: target, dtype: float64

In [4]:
diabetes.target.head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

## 分割测试集和训练集

In [5]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(diabetes.data.values, diabetes.target.values, test_size=0.2, random_state=42)# random_state用于设置随机种子
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# 创建训练集和测试集的 TensorDataset 对象
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 8
# 创建 DataLoader 对象，用于批量加载数据
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 线性回归

In [7]:
import torch.nn as nn
import torch.optim as optim

class LinearRegression(nn.Module):
  def __init__(self, input_size, output_size):
    super(LinearRegression, self).__init__()
    self.linear = nn.Linear(input_size, output_size)

  def forward(self, x):
    return self.linear(x)

# 实例化模型
input_size = X_train.shape[1]
output_size = 1
model = LinearRegression(input_size, output_size)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-2)

# 设置超参数
num_epochs = 30

# 训练模型
for epoch in range(num_epochs):
  model.train()  # 设置模型为训练模式
  total_loss = 0

  for batch_X, batch_y in DataLoader(train_dataset, batch_size=8, shuffle=True):
    # 前向传播
    batch_y = batch_y.view(-1, 1)
    outputs = model(batch_X)
    loss = criterion(outputs, batch_y)

    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

  # 打印每个 epoch 的损失
  print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

# 在测试集上评估模型
model.eval()  # 设置模型为评估模式
with torch.no_grad():
    y_test = y_test.view(-1, 1)
    y_pred = model(X_test)
    y_pred_rounded = torch.clamp(torch.round(y_pred), min=0, max=2)  # 四舍五入并限制在0到2之间
    num_common_elements = torch.sum(y_pred_rounded == y_test)  # 计算对应位置相同元素的数量
    accuracy = num_common_elements.item() / len(y_test)  # 计算准确率
    test_loss = criterion(y_pred, y_test)
    print(f'Test Loss: {test_loss.item():.4f}')
    print(f'Accuracy: {accuracy:.4f}\ntotal: {len(y_test)}\nright_num: {num_common_elements.item()}')


Epoch [1/30], Loss: 0.2850
Epoch [2/30], Loss: 0.0786
Epoch [3/30], Loss: 0.0827
Epoch [4/30], Loss: 0.0620
Epoch [5/30], Loss: 0.0583
Epoch [6/30], Loss: 0.0571
Epoch [7/30], Loss: 0.0607
Epoch [8/30], Loss: 0.0578
Epoch [9/30], Loss: 0.0579
Epoch [10/30], Loss: 0.0769
Epoch [11/30], Loss: 0.0665
Epoch [12/30], Loss: 0.0547
Epoch [13/30], Loss: 0.0616
Epoch [14/30], Loss: 0.0589
Epoch [15/30], Loss: 0.0686
Epoch [16/30], Loss: 0.0537
Epoch [17/30], Loss: 0.0621
Epoch [18/30], Loss: 0.0638
Epoch [19/30], Loss: 0.0549
Epoch [20/30], Loss: 0.0686
Epoch [21/30], Loss: 0.0579
Epoch [22/30], Loss: 0.0729
Epoch [23/30], Loss: 0.0621
Epoch [24/30], Loss: 0.0701
Epoch [25/30], Loss: 0.0776
Epoch [26/30], Loss: 0.0674
Epoch [27/30], Loss: 0.0658
Epoch [28/30], Loss: 0.0601
Epoch [29/30], Loss: 0.0671
Epoch [30/30], Loss: 0.0654
Test Loss: 0.0382
Accuracy: 1.0000
total: 30
right_num: 30
