pytorch相关环境

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader,random_split
from torch.utils.tensorboard import SummaryWriter
print(torch.cuda.get_device_name(torch.cuda.current_device()))
print("CUDA version:", torch.version.cuda)
print("PyTorch version:", torch.__version__)


NVIDIA GeForce RTX 3070 Laptop GPU
CUDA version: 12.4
PyTorch version: 2.6.0+cu124


数据处理相关环境

In [36]:
import pandas as pd
import os
import csv
from tqdm import tqdm
import numpy as np
import math

In [69]:
device = 'cuda'

config = {    
    'seed': 5201314,      # Your seed number, you can pick your lucky number. :)
    'select_all': False,   # Whether to use all features.
    'valid_ratio': 0.2,   # validation_size = train_size * valid_ratio
    'n_epochs': 3000,     # Number of epochs.            
    'batch_size': 256, 
    'learning_rate': 1e-5,              
    'early_stop': 400,    # If model has not improved for this many consecutive epochs, stop training.     
    'save_path': './model.ckpt' , # Your model will be saved here.
    'feature':[0,1,2,3,4]
}
#固定随机数种子
def same_seed(seed):
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
#划分训练集 验证集
def train_valid_split(data_set,valid_ratio,seed):
    valid_set_size = int(len(data_set) * valid_ratio) #防止乘ratio为小数
    train_set_size = len(data_set) - valid_set_size
    train_set,vaild_set = random_split(data_set,[train_set_size,valid_set_size],generator=torch.Generator().manual_seed(seed))
    return np.array(train_set),np.array(vaild_set)

#测试集测试
def predict(test_loader,model,device):
    model.eval() #切换模型至评估模式
    preds = []

    for x in tqdm(test_loader): #tqdm用于显示进度条
        x = x.to(device) #指定数据计算位置，cpu,gpu?
        
        with torch.no_grad():
            pred = model(x) 
            preds.append(pred.detach().cpu()) #把数从gpu迁移至cpu

    preds = torch.cat(preds, dim=0).numpy()
    return preds

#分割标签并选择特征
def select_feat(train_data,valid_data,test_data,select_all = True):
    #提取标签（真实输出）
    y_train,y_valid = train_data[:,-1],valid_data[:,-1]
    #提取输入,测试集没有输出值
    x_train,x_valid,x_test = train_data[:,:-1],valid_data[:,:-1],test_data

    if select_all: #选择全部特征作为输入
        feat_index = list(range(x_train.shape[1]))
    else:
        feat_index = list(range(1,x_train.shape[1])) #后续加config

    return x_train[:,feat_index],x_valid[:,feat_index],x_test[:,feat_index],y_train,y_valid

#print(list((range(5))))

用于获取数据集中数据信息

In [70]:
class COVID19Dataset(Dataset): #重写某些方法，继承并保持某些方法
    def __init__(self,x,y=None): #将数据转换成pytorch格式
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        
        self.x = torch.FloatTensor(x)

    def __getitem__(self, index): #根据是否有结果返回数据,重写方法
        if self.y is None:
            return self.x[index]
        else:
            return self.x[index],self.y[index]
        
    def __len__(self):
        return len(self.x)

定义网络结构

In [114]:
class My_Model(nn.Module):
    def __init__(self, inputdims):
        super(My_Model,self).__init__() #继承方法并让系统知道这是pytorch的网络结构方便后续优化
        self.layers = nn.Sequential(
            nn.Linear(inputdims,128),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,16),
            nn.ReLU(),
            nn.Linear(16,1),
        )

    def forward(self,data):
        data = self.layers(data).squeeze(1)
        return data
        

In [115]:
same_seed(config['seed'])

train_data,test_data = pd.read_csv('./covid.train.csv').values,pd.read_csv('./covid.test.csv').values
'''
print(len(train_data) )
#print(train_data.type())
for i in range(len(train_data)):
    print(train_data[i,0])
'''
#划分数据集
train_data,valid_data = train_valid_split(train_data,config['valid_ratio'],config['seed'])
#读入数据
x_train,x_valid,x_test,y_train,y_valid = select_feat(train_data,valid_data,test_data,select_all=config['select_all'])

'''for i in range(len(x_train)):
    print(x_train[i,0])
'''
train_dataset,valid_dataset,test_dataset = COVID19Dataset(x_train,y_train),COVID19Dataset(x_valid,y_valid),COVID19Dataset(x_test)
#对数据进行打包，打乱
train_loader = DataLoader(train_dataset,batch_size=config['batch_size'],shuffle=True,pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)

'''for x,y in train_loader:
    for i in range(len(x)):
        print(x[i][0])
    print(x.shape)
'''


'for x,y in train_loader:\n    for i in range(len(x)):\n        print(x[i][0])\n    print(x.shape)\n'

训练器

In [116]:
def trainer(train_loader:DataLoader,valid_loader:DataLoader,model:My_Model,config,device):
    
    criterion = nn.MSELoss(reduction='mean')
    optimizer = torch.optim.SGD(model.parameters(),lr=config['learning_rate'],momentum=0.9)
    writer = SummaryWriter()

    n_epochs,best_loss,step,early_stop_count = config['n_epochs'],math.inf,0,0

    for epoch in range(n_epochs):
        model.train()
        loss_record = []
        train_pbar = tqdm(train_loader, position=0, leave=True) #显示训练进度

        for x,y in train_pbar:
            optimizer.zero_grad()
            x,y = x.to(device),y.to(device)
            pred = model(x)
            loss = criterion(pred,y)

            loss.backward()
            optimizer.step()

            step = step+1
            loss_record.append(loss)

            train_pbar.set_description(f'Epoch[{epoch+1}/{n_epochs}]') #进度条前缀说明
            train_pbar.set_postfix({'loss':loss.detach().item()}) #显示动态指标
            

        mean_train_loss = sum(loss_record) / len(loss_record)
        writer.add_scalar('train_loss',mean_train_loss,step)

        model.eval()
        loss_record = []
        for x,y in valid_loader:
            x,y = x.to(device),y.to(device)
            with torch.no_grad():
                pred = model(x)
                loss = criterion(pred,y)

            loss_record.append(loss.item())

        mean_valid_loss = sum(loss_record) / len(loss_record)
        print()
        print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}\n')
        writer.add_scalar('valid_loss',mean_valid_loss,step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state_dict(),config['save_path'])
            print('Saving model')
            early_stop_count = 0
        else:
            early_stop_count+=1


        if early_stop_count >= config['early_stop']:
            print('stop train')
            return

In [117]:
model = My_Model(x_train.shape[1]).to(device)
trainer(train_loader,valid_loader,model,config,device)

  0%|          | 0/9 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x128 and 64x32)

In [104]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/

Reusing TensorBoard on port 6006 (pid 1788), started 18:03:42 ago. (Use '!kill 1788' to kill it.)

In [109]:
def save_pred(preds, file):
    with open(file, 'w') as fp:
        writer = csv.writer(fp)
        writer.writerow(['id', 'tested_positive'])
        for i, p in enumerate(preds):
            writer.writerow([i, p])

test_model = My_Model(inputdims=x_train.shape[1]).to(device)
test_model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader,model,device)
save_pred(preds,'pred.csv')

100%|██████████| 5/5 [00:00<00:00, 511.04it/s]
