In [None]:
import os, sys, glob, argparse
import pandas as pd
import numpy as np
# from tqdm import tqdm

import cv2, time
# from PIL import Image
# from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchmetrics
import torchvision.models as models
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

import albumentations as A


# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [None]:
torch.cuda.get_device_name(0)

In [None]:
# 读取数据集
train_path = glob.glob("./data/sample/*/*")
test_path = glob.glob("./data/test/*")

train_path.sort()
test_path.sort()

# train_df = pd.read_csv("data/train.csv")
# train_df = train_df.sort_values(by="name")
# train_label = train_df["label"].values

train_label = [np.int64(0) for i in range(480)]+[np.int64(1) for i in range(2400)]

# 自定义数据集
# 带有图片缓存的逻辑
DATA_CACHE = {}


class XunFeiDataset(Dataset):
    def __init__(self, img_path, img_label, transform=None):
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None

    def __getitem__(self, index):
        if self.img_path[index] in DATA_CACHE:
            img = DATA_CACHE[self.img_path[index]]
        else:
            img = cv2.imread(self.img_path[index])
            DATA_CACHE[self.img_path[index]] = img
        if self.transform is not None:
            img = self.transform(image=img)["image"]
        img = img.transpose([2, 0, 1])
        return img, torch.from_numpy(np.array(self.img_label[index]))

    def __len__(self):
        return len(self.img_path)

dataset = XunFeiDataset(
    train_path,
    train_label,
    A.Compose(
        [
            A.RandomRotate90(),
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ]
    ),
)
train_dataset, val_dataset = torch.utils.data.random_split(
    dataset=dataset, lengths=[0.95, 0.05], generator=torch.Generator().manual_seed(42)
)

# 训练集
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=False
)

# 验证集
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=False
)

# 测试集
test_loader = torch.utils.data.DataLoader(
    XunFeiDataset(
        test_path,
        [0] * len(test_path),
        A.Compose(
            [
                A.Resize(256, 256),
                A.RandomCrop(224, 224),
                A.HorizontalFlip(p=0.5),
                A.RandomBrightnessContrast(p=0.5),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ]
        ),
    ),
    batch_size=50,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
)

In [None]:
a=cv2.imread(train_path[0])
np.shape(a)

In [None]:
class XunFeiNet18(nn.Module):
    def __init__(self):
        super(XunFeiNet18, self).__init__()
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(512, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out
    
class XunFeiNet34(nn.Module):
    def __init__(self):
        super(XunFeiNet34, self).__init__()
        model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(512, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out
    
class XunFeiNet50(nn.Module):
    def __init__(self):
        super(XunFeiNet50, self).__init__()
        model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out

class XunFeiNet101(nn.Module):
    def __init__(self):
        super(XunFeiNet101, self).__init__()
        model = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out

class XunFeiNet121(nn.Module):
    def __init__(self):
        super(XunFeiNet121, self).__init__()
        model = models.densenet121(num_classes=2)
        # model.avgpool = nn.AdaptiveAvgPool2d(1)
        # model.fc = nn.Linear(512, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out
    
class XunFeiNet152(nn.Module):
    def __init__(self):
        super(XunFeiNet152, self).__init__()
        model = models.resnet152(weights=models.ResNet152_Weights.DEFAULT)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 2)
        self.resnet = model
    def forward(self, img):
        out = self.resnet(img)
        return out


model18 = XunFeiNet18().to(device)
model34 = XunFeiNet34().to(device)
model50 = XunFeiNet50().to(device)
model101 = XunFeiNet101().to(device)
model121 = XunFeiNet121().to(device)
model152 = XunFeiNet152().to(device)


criterion = nn.CrossEntropyLoss().cuda()

In [None]:
# 模型训练
def train(train_loader, model, criterion, optimizer):
    start = time.time()
    start_batch = [start, 0]
    model.train()
    train_loss = 0.0
    preds = torch.tensor([])
    target_all = torch.tensor([])
    for i, (input, target) in enumerate(train_loader):
        input = input.to(device)
        target = target.to(device)
        output = model(input)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # if (i + 1) % 10 == 0:
        #     start_batch[(1+(i + 1) // 100) % 2] = time.time()
        #     print(
        #         "Train loss",
        #         loss.item(),
        #         "t={}s".format(
        #             start_batch[(1+(i + 1) // 100) % 2]
        #             - start_batch[((i + 1) // 100) % 2]
        #         ),
        #     )

        preds = torch.cat((preds, output.cpu().argmax(1)))
        target_all = torch.cat((target_all, target.cpu()))

        train_loss += loss.item()

    val_acc = torchmetrics.functional.classification.multiclass_f1_score(
        preds, target_all, num_classes=2, average="macro"
    )
    print("t={}s".format(time.time() - start))
    print("F1 score", val_acc)
    return train_loss / len(train_loader)


# 模型验证
def validate(val_loader, model, criterion):
    model.eval()
    val_acc = 0.0
    preds = torch.tensor([])
    target_all = torch.tensor([])
    val_loss=0.0
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = input.to(device)
            target = target.to(device)
            output = model(input)
            loss = criterion(output, target)
            # val_acc += (output.argmax(1) == target).sum().item()
            preds = torch.cat((preds, output.cpu().argmax(1)))
            target_all = torch.cat((target_all, target.cpu()))

        val_acc = torchmetrics.functional.classification.multiclass_f1_score(
            preds, target_all, num_classes=2, average="macro"
        )
        val_loss += loss.item()
    # return val_acc / len(val_loader.dataset)
    return val_acc,val_loss/len(val_loader)


# 模型预测
def predict(test_loader, model):
    model.eval()

    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.to(device)
            output = model(input)
            test_pred.append(output.data.cpu().numpy())

    return np.vstack(test_pred)

In [None]:
# for best model
model=model152
optimizer = torch.optim.AdamW(model.parameters(), 0.000005)
# model=model101
epochs = 0
last_acc = 0
val_acc = 0.001
while last_acc < val_acc:
    last_acc = val_acc
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    # train_acc = validate(train_loader, model)
    print(
        "epoch {} :".format(epochs + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
    if val_acc>last_acc:
        torch.save(model.state_dict(),"./model/baseline_152.pth")

In [None]:
# for best model
model=model101
optimizer = torch.optim.AdamW(model.parameters(), 0.000005)
# model=model101
epochs = 0
last_acc = 0
val_acc = 0.001
while last_acc < val_acc:
    last_acc = val_acc
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    # train_acc = validate(train_loader, model)
    print(
        "epoch {} :".format(epochs + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
    if val_acc>last_acc:
        torch.save(model.state_dict(),"./model/baseline_101.pth")

In [None]:
# for best model
model=model50
optimizer = torch.optim.AdamW(model.parameters(), 0.000005)
# model=model101
epochs = 0
last_acc = 0
val_acc = 0.001
while last_acc < val_acc:
    last_acc = val_acc
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    # train_acc = validate(train_loader, model)
    print(
        "epoch {} :".format(epochs + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
    if val_acc>last_acc:
        torch.save(model.state_dict(),"./model/baseline_50.pth")

In [None]:
# for best model
model=model34
optimizer = torch.optim.AdamW(model.parameters(), 0.000005)
# model=model101
epochs = 0
last_acc = 0
val_acc = 0.001
while last_acc < val_acc:
    last_acc = val_acc
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    # train_acc = validate(train_loader, model)
    print(
        "epoch {} :".format(epochs + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
    if val_acc>last_acc:
        torch.save(model.state_dict(),"./model/baseline_34.pth")

In [None]:
model=model50
optimizer = torch.optim.AdamW(model.parameters(), 0.000005)
epochs = 10
for epoch in range(epochs):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    print(
        "epoch {} :".format(epoch + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )

In [None]:
# 对测试集多次预测
pred = None
model=model101
model.load_state_dict(torch.load("./model/baseline_101.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += predict(test_loader, model)
    print(_+1)
model=model152
model.load_state_dict(torch.load("./model/baseline_152.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += 0.9*predict(test_loader, model)
    print(_+1)
model=model50
model.load_state_dict(torch.load("./model/baseline_50.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += predict(test_loader, model)
    print(_+1)
model=model34
model.load_state_dict(torch.load("./model/baseline_34.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += 0.9*predict(test_loader, model)
    print(_+1)
submit = pd.DataFrame(
    {
        'name': [x.split('/')[-1] for x in test_path],
        'label': pred.argmax(1)
})

# 生成提交结果
submit = submit.sort_values(by='name')
submit.to_csv('submit6.csv', index=None)

In [None]:
model.train()
train_loss = 0.0
model = model.to(device)
for i, (input, target) in enumerate(train_loader):
    input = input.to(device)
    target = target.to(device)
    output = model(input)
    loss = criterion(output, target)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
epochs=4
model=model101
optimizer = torch.optim.AdamW(model.parameters(), 0.0001)
for epoch in range(epochs):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    print(
        "epoch {} :".format(epoch + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
torch.save(model.state_dict(),"./model/baseline_test.pth")



In [None]:
epochs=3
model101= XunFeiNet().to(device)
model=model101
optimizer = torch.optim.AdamW(model.parameters(), 0.0001)
for epoch in range(epochs):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc,val_loss = validate(val_loader, model, criterion)
    print(
        "epoch {} :".format(epoch + 1), "train_loss:", train_loss, 'val_loss:', val_loss,"val_f1:", val_acc
    )
    epochs += 1
torch.save(model.state_dict(),"./model/baseline_101.pth")

In [None]:
pred = None
model=model101
model.load_state_dict(torch.load("./model/baseline_test.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += predict(test_loader, model)
    print(_+1)
model.load_state_dict(torch.load("./model/baseline_101.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += predict(test_loader, model)
    print(_+1)
model=model50
model.load_state_dict(torch.load("./model/baseline_50.pth"))
for _ in range(3):
    if pred is None:
        pred = predict(test_loader, model)
    else:
        pred += predict(test_loader, model)
    print(_+1)
submit = pd.DataFrame(
    {
        'name': [x.split('/')[-1] for x in test_path],
        'label': pred.argmax(1)
})

# 生成提交结果
submit = submit.sort_values(by='name')
submit.to_csv('submit5.csv', index=None)

In [None]:
model = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
model.fc

In [None]:
val_acc = validate(val_loader, model)
val_acc

In [None]:
dataset=XunFeiDataset(train_path[:-1000], train_label[:-1000],
            A.Compose([
            # A.RandomRotate90(),
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            # A.HorizontalFlip(p=0.5),
            # A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])
        )
train_dataset, test_dataset=torch.utils.data.random_split(dataset=dataset,lengths=[0.9,0.1],generator=torch.Generator().manual_seed(42))
train_dataset

In [None]:
model.load_state_dict(torch.load("./model/baseline_test.pth"))
val_acc,val_loss = validate(val_loader, model, criterion)
print(val_loss)
len(val_loader.dataset)