In [1]:
import pandas as pd
from pathlib import Path 
import torch
from fastai.vision.all import *
from fastai.text.all import *
from fastai.collab import *
from fastai.tabular.all import *
import fastai.learner as learner
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
path_data = Path('data')
path = path_data/'训练验证数据集'
path_train = path/'训练数据集'
path_train_data = [path_train/f'信号类型{i}训练集.txt' for i in range(1,13)]
names = ['ts','f_a','f_b','f_c','f_d','cat']

In [3]:
df_train = pd.concat([pd.read_csv(o,sep='\s+',header=None,names=names) for o in path_train_data])

In [4]:
path_val = path/'验证数据集'
path_val_data = [path_val/f'验证集{i}.txt' for i in range(1,4)]
df_val = pd.concat([pd.read_csv(o,sep='\s+',header=None,names=names) for o in path_val_data])
#df_val_1 = pd.read_csv(path_val_data[0],sep='\s+',header=None,names=names)
#df_val_2 = pd.read_csv(path_val_data[1],sep='\s+',header=None,names=names)
#df_val_3 = pd.read_csv(path_val_data[2],sep='\s+',header=None,names=names)

In [5]:
cont_names = ['f_a','f_b','f_c','f_d']
y_names = 'cat'
procs = [Normalize]
batch_size = bs = 512
cbs=[ProgressCallback(),ShowGraphCallback(),TrackerCallback(),MCDropoutCallback()]

In [6]:
splits = RandomSplitter(valid_pct=0.2)(range_of(df_train))

In [7]:
to = TabularPandas(df_train, procs=procs,
                   cont_names = cont_names,
                   y_names=y_names,
                   splits=splits)

In [8]:
X_train, y_train = to.train.xs, to.train.ys.values.ravel()
X_test, y_test = to.valid.xs, to.valid.ys.values.ravel()

In [65]:
X_train

Unnamed: 0,f_a,f_b,f_c,f_d
190634,0.136722,0.241937,0.150956,1.254089
17133,0.110435,-0.016344,0.265848,-0.494801
75230,0.124922,0.018271,-1.187741,-0.455112
49980,0.138072,-0.034983,-0.095241,0.706120
28387,0.117004,0.183358,-0.620462,1.074847
...,...,...,...,...
49153,0.135996,0.050224,-2.079180,0.716363
163923,0.076839,0.146080,1.554280,1.183673
39857,0.065272,0.209985,0.703874,1.082529
77141,0.107696,0.156731,-0.517880,1.207999


In [66]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    #random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(
            indices[i:min(i + batch_size, num_examples)])  # 类型转换为long和tensor
        yield torch.tensor(np.array(features.iloc[j]),dtype=torch.float32), torch.tensor(labels[j])

In [86]:
train_iter = data_iter(batch_size,X_train,y_train)
test_iter = data_iter(batch_size,X_test,y_test)

In [78]:
X_test

Unnamed: 0,f_a,f_b,f_c,f_d
70691,0.179794,-1.968096,1.715333,-1.215611
86072,0.138099,-1.909517,0.717209,-1.291148
125083,0.065468,0.111465,0.041193,0.905847
75841,0.164295,-0.050959,0.612576,-0.514006
82912,0.110098,-0.144153,-2.205356,-0.621551
...,...,...,...,...
123344,0.168046,0.066200,-0.921027,-0.492241
101028,0.063013,0.156731,-1.131321,1.152946
75681,0.069335,0.015609,-0.584558,1.215680
140764,0.184909,-1.882890,0.694641,-1.259141


In [80]:
net = nn.Sequential(nn.Linear(4,64),nn.GELU(),
                    nn.Linear(64,128),nn.GELU(),
                    nn.Linear(128,12))

In [47]:
def evaluate_accuracy(data_iter, net,device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        X.to(device)
        y.to(device)
        y = y-1
        net.eval() 
        acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().item()
        net.train()
        n += y.shape[0]
    return acc_sum / n

In [38]:
def train(net, train_iter, test_iter, loss, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y-1
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y.long())
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

In [81]:
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [82]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
acc_sum, n = 0.0, 0
X, y =next(test_iter)
y = y-1
net.eval() 
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().item()
net.train()
n += y.shape[0]
acc_sum/n

StopIteration: 

In [48]:
evaluate_accuracy(test_iter, net)

ZeroDivisionError: float division by zero

In [40]:
train(net,train_iter,test_iter,loss=loss,optimizer=optimizer,
      device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
      num_epochs=5)

training on  cuda
epoch 1, loss 0.2165, train acc 0.908, test acc 0.941, time 10.7 sec


ZeroDivisionError: float division by zero

In [105]:
def accuracy_round(inp, targ, axis=-1):
    "Compute accuracy with `targ` when `pred` is bs * n_classes"
    pred,targ = flatten_check(inp, targ)
    return (pred.round_() == targ).float().mean()


In [None]:
learn.lr_find()

In [None]:
lr=1e-3
learn.fit(6,lr=lr,cbs=cbs)

In [None]:
def get_pred(learn,df):
    tst_dl = learn.dls.test_dl(df) 
    return learn.get_preds(dl=tst_dl)


In [None]:
def get_acc(pred):
    pred[0].round_().clamp_(1,12)
    return (pred[0] == pred[1]).float().mean()

In [None]:
pred_1 = get_pred(df_val_1)
pred_2 = get_pred(df_val_2)
pred_3 = get_pred(df_val_3)
pred_t = get_pred(df_train)

In [None]:
get_acc(pred_1),get_acc(pred_2),get_acc(pred_3),get_acc(pred_t)

In [None]:
df_val.corr()