Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 4 additions & 21 deletions .devcontainer/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,6 @@
#FROM elainasuki/ros:ros2-humble-full-v3
FROM elainasuki/ros:ros2-humble-full-0614

ARG USERNAME=Elaina

ARG USER_GID=$USER_UID

ARG GROUP_NAME=wheel
RUN apt-get update \
&& apt-get install -y sudo vim nautilus
USER ${USERNAME}
#安装前置依赖
FROM ubuntu:22.04
RUN apt-get update\
&& apt-get install -y git python3 python3-pip
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# 安装yolo
RUN pip install imutils ultralytics openvino
#安装其他依赖

RUN pip install jupyter d2l==0.17.6

USER root
RUN echo "source /opt/ros/humble/setup.bash" >> /home/${USERNAME}/.bashrc
ENV PYTHONPATH="/home/Elaina/yolo:${PYTHONPATH}"
RUN pip install pandas openpyxl matplotlib
2 changes: 1 addition & 1 deletion .devcontainer/cpu/build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ echo "脚本目录: $SCRIPT_DIR"
echo "父目录: $PARENT_DIR"

# 设置默认 tag
TAG="pytorch"
TAG="pytorch_cpu"

# 从外部传入的 IMAGE_REPO(格式:ghcr.io/user/repo 或 docker.io/user/repo)
IMAGE_REPO="elainasuki/other"
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "information-task-container",
"dockerComposeFile": "docker-compose.yml",
"service": "gpu-service",
"workspaceFolder": "/home/Elaina/pytorch",
"workspaceFolder": "/pytorch",
"shutdownAction": "stopCompose",
"customizations": {
"vscode": {
Expand Down
34 changes: 9 additions & 25 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
version: '3'
services:
pytorch-service:
# build:
# context: ..
# dockerfile: .devcontainer/Dockerfile
image: elainasuki/other:pytorch
image: elainasuki/other:pytorch_cpu
container_name: information-cpu-container
environment:
- DISPLAY=${DISPLAY}
Expand All @@ -14,21 +11,13 @@ services:
- TERM=xterm-256color
volumes:
- /tmp/.X11-unix:/tmp/.X11-unix
- ./..:/home/Elaina/pytorch
- /dev:/dev
network_mode: host
pid: "host" # 添加 pid 命名空间共享
ipc: "host" # 添加 ipc 命名空间共享
privileged: true
- ./..:/pytorch/
entrypoint: [ '/bin/bash' ]
stdin_open: true
tty: true
user: "Elaina"
# runtime: "nvidia"
working_dir: "/home/Elaina/pytorch" # 指定默认工作目录
working_dir: "/pytorch" # 指定默认工作目录
gpu-service:
# build:
# context: ..
# dockerfile: .devcontainer/Dockerfile
image: elainasuki/other:pytorch_gpu
container_name: information-task-container
environment:
Expand All @@ -39,14 +28,9 @@ services:
- TERM=xterm-256color
volumes:
- /tmp/.X11-unix:/tmp/.X11-unix
- ./..:/home/Elaina/pytorch
- /dev:/dev
network_mode: host
pid: "host" # 添加 pid 命名空间共享
ipc: "host" # 添加 ipc 命名空间共享
privileged: true
stdin_open: true
tty: true
user: "Elaina"
- ./..:/pytorch
runtime: "nvidia"
working_dir: "/home/Elaina/pytorch" # 指定默认工作目录
tty: true
stdin_open: true
entrypoint: [ '/bin/bash' ]
working_dir: "/pytorch" # 指定默认工作目录
46 changes: 4 additions & 42 deletions .devcontainer/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,44 +1,6 @@
# 基础镜像:ROS 2 Humble(官方桌面版,基于Ubuntu 22.04)
FROM elainasuki/ros:ros2-humble-full-0614
FROM pytorch/pytorch:2.9.0-cuda13.0-cudnn9-runtime

# 修复未定义变量
ARG USERNAME=Elaina
ARG USER_UID=1000
ARG USER_GID=$USER_UID
RUN pip3 install pandas openpyxl matplotlib

# 设置非交互模式+pip缓存目录(避免占用根目录空间)
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_CACHE_DIR=/tmp/pip-cache

# 1. 精简基础依赖(仅保留必需项)+ 清理缓存
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
ca-certificates \
python3-pip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# 2. 仅安装CUDA运行时(极致精简)
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
&& dpkg -i cuda-keyring_1.1-1_all.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
cuda-runtime-12-1 \
libcudnn8=8.9.2.26-1+cuda12.1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& rm -f cuda-keyring_1.1-1_all.deb

# 3. 配置环境变量(修复未定义问题)
ENV PATH=/usr/local/cuda-12.1/bin:${PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64:${LD_LIBRARY_PATH:-/usr/local/lib}

# 4. 优化PyTorch安装(节省空间)
RUN pip3 install --no-cache-dir --upgrade pip \
&& pip3 install --no-cache-dir --ignore-installed sympy \
&& pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 \
&& rm -rf $PIP_CACHE_DIR /tmp/* /var/tmp/*
#安装pandas 和openpyxl
RUN pip3 install pandas openpyxl
# 5. 配置ROS 2环境
RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
RUN apt-get update &&\
apt-get install -y git
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.~*
model/
*.png
*.pyc
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "standard"
}
Binary file removed data/.~task2.xlsx
Binary file not shown.
25 changes: 25 additions & 0 deletions src/infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from visual import *
from model import *
from load_data import *
def infer():
#初始化参数
model_path='/pytorch/model/best_transformer.pth'
device=torch.device('cuda' if torch.cuda.is_available() else'cpu')
#加载模型
model=MLPModel(input_size=8,hidden_size=24,output_size=2,device=device)
model.load_state_dict(torch.load(model_path, map_location=device, weights_only=True)['model_state_dict'])
#加载数据
data_path='/pytorch/data/task2.xlsx'
batcher = DataBatcher(file_path=data_path, val_ratio=0.2, batch_size=16,device=device)
val_inputs, val_outputs = batcher.getValBatches()
#进行推理
output=model(val_inputs)
#取第1batch的结果比较
print("真实值:", val_outputs[0])
print("预测值:", output[0])
# 计算均方误差
loss_fn=torch.nn.MSELoss()
loss=loss_fn(output,val_outputs)
print(f"验证集均方误差: {loss.item():.4f}")
if __name__ == "__main__":
infer()
10 changes: 5 additions & 5 deletions src/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ def LoadTask2Data(file_path)->tuple[torch.Tensor, torch.Tensor]:
outputs_tensor = torch.tensor(outputs, dtype=torch.float32)
return inputs_tensor, outputs_tensor
class DataBatcher:
def __init__(self, file_path: str, val_ratio: float = 0.2, batch_size: int = 32, shuffle: bool = True):
def __init__(self, file_path: str, val_ratio: float = 0.2, batch_size: int = 32, shuffle: bool = True,device: torch.device = torch.device('cpu')):
"""
数据批处理工具类,支持划分验证集和生成批次数据
"""
self.batch_size = batch_size
self.shuffle = shuffle

self.device = device
# 加载原始数据
self.inputs, self.outputs = LoadTask2Data(file_path)

Expand Down Expand Up @@ -81,16 +81,16 @@ def _splitAndCreateBatches(self, val_ratio: float):

def getTrainBatches(self):
"""获取训练集批次张量 (输入, 输出)"""
return self.train_inputs, self.train_outputs
return self.train_inputs.to(self.device), self.train_outputs.to(self.device)

def getValBatches(self):
"""获取验证集批次张量 (输入, 输出)"""
return self.val_inputs, self.val_outputs
return self.val_inputs.to(self.device), self.val_outputs.to(self.device)


if __name__ == "__main__":
# 测试代码
file_path = os.path.join("/home/Elaina/pytorch/data", "task2.xlsx")
file_path = os.path.join("/pytorch/data", "task2.xlsx")

# 初始化批处理工具
batcher = DataBatcher(file_path, val_ratio=0.2, batch_size=16)
Expand Down
19 changes: 19 additions & 0 deletions src/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import torch.nn as nn
import os,torch
class MLPModel(nn.Module):
"""简单的多层感知机模型"""

def __init__(self, input_size, hidden_size, output_size, device=torch.device('cpu')):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(hidden_size, output_size)
self.device=device
self.to(device) # 关键:初始化时就把模型移到目标设备
def forward(self, x:torch.Tensor)->torch.Tensor:
assert isinstance(x, torch.Tensor), "输入必须是torch.Tensor类型"
x = x.to(self.device) # 确保输入数据在正确的设备上
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
return x
32 changes: 32 additions & 0 deletions src/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from visual import *
from model import *
from load_data import *
def train():
device=torch.device('cuda' if torch.cuda.is_available() else'cpu')
model=MLPModel(input_size=8,hidden_size=24,output_size=2,device=device)
data_path='/pytorch/data/task2.xlsx'
model_path='/pytorch/model/'
batcher = DataBatcher(file_path=data_path, val_ratio=0.2, batch_size=16,device=device)
train_inputs, train_outputs = batcher.getTrainBatches()
val_inputs, val_outputs = batcher.getValBatches()
#平方损失
loss_fn=torch.nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)

visual=SaveAndVisual(model_dir=model_path, loss_img_path=model_path+'loss_curve.png')
num_epoch=600
visual.loadModel(model,optimizer,device)
for epoch in range(num_epoch):
model.train()
inputs=train_inputs
targets=train_outputs
optimizer.zero_grad()
outputs=model(inputs)
loss=loss_fn(outputs,targets)
loss.backward()
optimizer.step()
visual.updateVisualization(epoch,loss.item())
visual.finalizeVisualization()

if __name__ == "__main__":
train()
86 changes: 86 additions & 0 deletions src/visual.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# visual.py
import matplotlib.pyplot as plt
import os,torch
class SaveAndVisual:
"""模型管理类,负责模型保存和训练可视化"""

def __init__(self, model_dir='models', loss_img_path='loss_curve.png'):
self.model_dir = model_dir
self.loss_img_path = loss_img_path
self.epoch_losses = [] # 存储每个epoch的损失
self.epoch_indices = [] # 存储epoch索引
self._init_visualization()
self._init_model_dir()
self.loop_count=0
def _init_model_dir(self):
"""初始化模型保存目录"""
os.makedirs(self.model_dir, exist_ok=True)

def _init_visualization(self):
"""初始化可视化环境"""
plt.ion() # 开启交互模式
self.fig, self.ax = plt.subplots(figsize=(10, 6))
self.ax.set_xlabel("Epoch")
self.ax.set_ylabel("Loss")
self.ax.set_title("Training Loss (per Epoch)")
self.line, = self.ax.plot([], [], label="Epoch Loss")
self.ax.legend()
self.min_loss=float('inf')
def loadModel(self, model:torch.nn.Module, optimizer, device):
"""加载已保存的模型"""
model_path = os.path.join(self.model_dir, 'best_transformer.pth')
self.model=model
self.optimizer=optimizer
if os.path.exists(model_path):
checkpoint = torch.load(model_path, map_location=device, weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
print(f"发现现有模型,其损失为: {checkpoint['loss']:.4f}")
self.min_loss=checkpoint['loss']
return checkpoint['loss']
return float('inf')

def saveModel(self, model, optimizer, epoch, loss):
"""保存模型检查点"""
model_path = os.path.join(self.model_dir, 'best_transformer.pth')
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}, model_path)
# print(f" 保存最佳模型(损失:{loss:.4f})")

def updateVisualization(self, epoch, loss):
"""更新训练损失可视化"""
self.epoch_losses.append(loss)
self.epoch_indices.append(epoch)
self.loop_count+=1
if(self.loop_count%10==0):
self.loop_count=0
print(f" 训练损失(第{epoch+1}轮):{loss:.4f}")
# 更新图像数据
self.line.set_data(self.epoch_indices, self.epoch_losses)
self.ax.relim() # 重新计算坐标轴范围
self.ax.autoscale_view() # 自动调整视图
if(loss<self.min_loss ):
self.min_loss=loss
self.saveModel(self.model,self.optimizer,epoch,loss)
plt.draw()
plt.pause(0.01)

def finalizeVisualization(self):
"""训练结束后保存并显示最终图像"""
plt.ioff() # 关闭交互模式
self.ax.set_title("Training Loss (Final)")
plt.savefig(self.loss_img_path)
plt.show()
if __name__ == "__main__":
plt.figure(figsize=(6, 4))
plt.plot([1, 2, 3, 4], [1, 4, 9, 16], 'r-', label='test')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('GUI')
plt.legend()
plt.grid(True)
plt.show() # 弹出窗口显示测试图