In [1]:
%matplotlib inline

In [2]:
import random
import numpy as np
import pandas as pd

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision as tv
from torchvision import transforms

In [3]:
import sys
sys.path.append("..")
from dataset import GenderDataset
from model.gender import GenderNet

In [4]:
def setup_seed(seed):
    import os
    os.environ['PYTHONHASHSEED'] = str(seed) # 为了禁止hash随机化，使得实验可复现
    torch.manual_seed(seed) # 为CPU设置随机种子
    torch.cuda.manual_seed_all(seed) # 为当前GPU设置随机种子
    torch.cuda.manual_seed_all(seed) # 为所有GPU设置随机种子
    np.random.seed(seed) # # Numpy设置随机种子
    random.seed(seed) # Python设置随机种子
    # torch.backends.cudnn.deterministic = True
    print('Setting up seed finished!')
    
setup_seed(1)

Setting up seed finished!


In [10]:
# Data
train_set = GenderDataset('data/gender_train.txt', transform=None)
train_loader = DataLoader(dataset=train_set, batch_size=32, shuffle=True)

valid_set = GenderDataset('data/gender_test.txt', transform=None)
valid_loader = DataLoader(dataset=valid_set, batch_size=32, shuffle=False)

In [11]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f'Using device {device}')

Using device cpu


In [12]:
def train(epoch, model):
    correct = 0
    total = 0
    epoch_loss = 0.
    running_loss = 0.
    model.train()
    if epoch == 0:
        print(model)
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        inputs, target = inputs.to(device), target.to(device)  # 转移到cuda，且在同一块显卡上
        optimizer.zero_grad()
        # forward + backard + update
        # outputs = base_model(inputs)
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        epoch_loss += loss.item()
        # if batch_idx % 1 == 1:
        _, predicted = torch.max(outputs.data, dim=1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

        print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/2000))
        print(f'Accuracy on train set: {round(100*correct/total, 2)}% [{correct}/{total}]')
        running_loss = 0.0
    return epoch_loss/2000, model

In [13]:
def valid(epoch, model):
    correct = 0
    total = 0
    print('Validating...')
    model.eval() # 模式转换为测试模式，主要会对BN层和dropout层根据模式进行调整 仍然计算梯度，但是不反向传播
    with torch.no_grad(): # 不会计算梯度，但是对BN层和dropout层并未进行调整 
        for data in valid_loader:
            inputs, target = data
            inputs, target = inputs.to(device), target.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, dim=1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    print('----------------------------------------------------------------------------')
    print(f'Accuracy on valid set: {round(100*correct/total, 2)}% [{correct}/{total}]')
    print('----------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------')

In [17]:
model = GenderNet()
model.to(device);

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [10, 20], 0.1)
criterion = nn.CrossEntropyLoss()

epoch_loss = []
for epoch in range(100):
    current_loss, model = train(epoch, model)
    valid(epoch, model)
    epoch_loss.append(current_loss)

GenderNet(
  (fc): Linear(in_features=2, out_features=2, bias=True)
)
[1,     1] loss: 0.001
Accuracy on train set: 46.88% [15/32]
[1,     2] loss: 0.001
Accuracy on train set: 51.56% [33/64]
[1,     3] loss: 0.001
Accuracy on train set: 55.21% [53/96]
[1,     4] loss: 0.000
Accuracy on train set: 51.56% [66/128]
[1,     5] loss: 0.000
Accuracy on train set: 52.5% [84/160]
Validating...
----------------------------------------------------------------------------
Accuracy on valid set: 50.0% [20/40]
----------------------------------------------------------------------------
----------------------------------------------------------------------------
[2,     1] loss: 0.000
Accuracy on train set: 46.88% [15/32]
[2,     2] loss: 0.001
Accuracy on train set: 43.75% [28/64]
[2,     3] loss: 0.001
Accuracy on train set: 46.88% [45/96]
[2,     4] loss: 0.001
Accuracy on train set: 46.88% [60/128]
[2,     5] loss: 0.000
Accuracy on train set: 50.0% [80/160]
Validating...
----------------------

In [None]:
plt.pl