In [22]:
import torch
from torch.nn import (
    Module,
    Linear,
    ReLU,
    Conv2d,
    Sequential,
    MaxPool2d,
    Flatten,
    Dropout,
    AdaptiveAvgPool2d,
    functional
)

In [None]:
class Inception(Module):
    def __init__(self,in_channels , c1,c2,c3,c4):
        super().__init__()
        self.p_1 = Conv2d(in_channels,c1,kernel_size=1)
        self.p_2 = Conv2d(in_channels,c2[0],kernel_size=1)
        self.p_3 = Conv2d(c2[0],c2[1],kernel_size=3,padding=1)
        self.p_4 = Conv2d(in_channels,c3[0],kernel_size=1)
        self.p_5 = Conv2d(c3[0],c3[1],kernel_size=5,padding=2)
        self.p_6 = MaxPool2d(kernel_size=3,stride=1,padding=1)
        self.p_7 = Conv2d(in_channels,c4,kernel_size=1)
    def forward(self,input):
        p1 = functional.relu(self.p_1(input))
        p2 = functional.relu(self.p_3(functional.relu(self.p_2(input))))
        p3 = functional.relu(self.p_5(functional.relu(self.p_4(input))))
        p4 = functional.relu(self.p_7(functional.relu(self.p_6(input))))
        return torch.concat((p1,p2,p3,p4),dim=1)
    
b1 = Sequential(
    Conv2d(1,64,kernel_size=7,stride=2,padding=3),
    ReLU(),
    MaxPool2d(kernel_size=3,stride=2,padding=1)

)
b2 = Sequential(
    Conv2d(64,64,kernel_size=1),
    ReLU(),
    Conv2d(64,192,kernel_size=3,padding=1),
    MaxPool2d(kernel_size=3,stride=2,padding=1)
    )
b3 = Sequential(
    Inception(192,64,(96,128),(16,32),32),
    Inception(256,128,(128,192),(32,96),64),
    MaxPool2d(kernel_size=3,stride=2,padding=1)
)
b4 = Sequential(
    Inception(480,192,(96,208),(16,48),64),
    Inception(512,160,(112,224),(24,64),64),
    Inception(512,128,(128,256),(24,64),64),
    Inception(512,112,(144,288),(32,64),64),
    Inception(528,256,(160,320),(32,128),128),
    MaxPool2d(kernel_size=3,stride=2,padding=1)
)
b5 = Sequential(
    Inception(832,256,(160,320),(32,128),128),
    Inception(832,384,(192,384),(48,128),128),
    AdaptiveAvgPool2d((1,1)),
    Flatten()
)
net = Sequential(
    b1,
    b2,
    b3,
    b4,
    b5,
    Linear(1024,10)
)


In [24]:
def init_net_parpmter(layer):
    if isinstance(layer,(Linear,Conv2d)):
        torch.nn.init.kaiming_normal_(layer.weight,mode="fan_in",nonlinearity='relu')
net.apply(init_net_parpmter)
def get_device(chose_device = 0 ):
    return f'cuda:{chose_device}' if torch.cuda.is_available() else "cpu"

device = get_device()

net.to(device)
from torchvision import transforms
import torchvision

trans = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((96,96))
    ]
)
mninst_train = torchvision.datasets.FashionMNIST(
    root='FashionMINIST',
    train=True,
    download=False,
    transform=trans
    )
mninst_text = torchvision.datasets. FashionMNIST(
    root="FashionMINIST",
    train=False,
    download=False,
    transform=trans
)


len(mninst_train),len(mninst_text)
from torch.utils.data import DataLoader

def get_dataloader(dataset,mode,batch_size=128):
    
    return DataLoader(
        dataset=dataset,
        shuffle= ('train' == mode),
        drop_last= ('train' == mode),
        batch_size=batch_size
    )

train_dataloader = get_dataloader(mninst_train,'train')
test_dataloader = get_dataloader(mninst_text,'test')
n_epoch = 10
lossfunction = torch.nn.CrossEntropyLoss()
optimer = getattr(torch.optim,'Adam')(net.parameters(),lr=0.0001,weight_decay=1e-4)
from tqdm.auto import  tqdm
def val(val_dataloader , model , device):
    model.eval()
    with torch.no_grad():
        acc = 0
        run =0 
        for val_feature , val_label in tqdm(val_dataloader):
            val_feature = val_feature.to(device)
            run +=1
            val_label = val_label.to(device)

            y_predict = net(val_feature)
            max_index = torch.argmax(y_predict,1)

            acc += (max_index == val_label).float().mean().item()

    return acc/run * 100



net.train()
for epoch in tqdm(range(n_epoch)):
    acc = 0
    run =0 
    for train_feature,train_label in tqdm(train_dataloader):
        run += 1
        train_feature = train_feature.to(device)
        train_label = train_label.to(device)
        y_hat = net(train_feature)

        max_index = torch.argmax(y_hat,1)
        optimer.zero_grad()
        loss = lossfunction(y_hat,train_label).to(device)
        loss.backward()
        optimer.step()
        acc += (max_index == train_label).float().mean().item()
    
    print("train:",acc/run*100)

    accuracy = val(test_dataloader,net,device)
    print("val:",accuracy)


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 468/468 [00:38<00:00, 12.21it/s]


train: 78.88788728632478


100%|██████████| 79/79 [00:02<00:00, 36.59it/s]
 10%|█         | 1/10 [00:40<06:04, 40.49s/it]

val: 82.47626582278481


100%|██████████| 468/468 [00:38<00:00, 12.15it/s]


train: 86.88902243589743


100%|██████████| 79/79 [00:02<00:00, 33.89it/s]
 20%|██        | 2/10 [01:21<05:25, 40.70s/it]

val: 87.91534810126582


100%|██████████| 468/468 [00:39<00:00, 11.89it/s]


train: 88.78872863247864


100%|██████████| 79/79 [00:02<00:00, 32.20it/s]
 30%|███       | 3/10 [02:03<04:48, 41.21s/it]

val: 88.16257911392405


100%|██████████| 468/468 [00:39<00:00, 11.91it/s]


train: 89.58333333333334


100%|██████████| 79/79 [00:02<00:00, 36.00it/s]
 40%|████      | 4/10 [02:44<04:07, 41.31s/it]

val: 89.7745253164557


100%|██████████| 468/468 [00:39<00:00, 11.86it/s]


train: 90.7134748931624


100%|██████████| 79/79 [00:02<00:00, 32.02it/s]
 50%|█████     | 5/10 [03:26<03:27, 41.54s/it]

val: 88.5185917721519


100%|██████████| 468/468 [00:39<00:00, 11.96it/s]


train: 91.40124198717949


100%|██████████| 79/79 [00:02<00:00, 32.83it/s]
 60%|██████    | 6/10 [04:08<02:46, 41.54s/it]

val: 90.5557753164557


100%|██████████| 468/468 [00:39<00:00, 11.94it/s]


train: 92.05061431623932


100%|██████████| 79/79 [00:02<00:00, 31.16it/s]
 70%|███████   | 7/10 [04:49<02:04, 41.61s/it]

val: 91.1689082278481


100%|██████████| 468/468 [00:39<00:00, 11.88it/s]


train: 92.84354967948718


100%|██████████| 79/79 [00:02<00:00, 33.47it/s]
 80%|████████  | 8/10 [05:31<01:23, 41.65s/it]

val: 91.36669303797468


100%|██████████| 468/468 [00:39<00:00, 11.93it/s]


train: 92.73671207264957


100%|██████████| 79/79 [00:02<00:00, 33.65it/s]
 90%|█████████ | 9/10 [06:13<00:41, 41.63s/it]

val: 91.69303797468355


100%|██████████| 468/468 [00:39<00:00, 11.83it/s]


train: 93.45786591880342


100%|██████████| 79/79 [00:02<00:00, 35.12it/s]
100%|██████████| 10/10 [06:54<00:00, 41.50s/it]

val: 90.84256329113924





In [26]:
x = torch.rand(size=(1,1,96,96))

for index,layer in enumerate(net):
    x = layer(x)
    print(layer.__class__.__name__ , 'shape',x.shape)


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor