In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

Функция загрузки датасета из директории

In [2]:
#сделать модулем
def load_dataset(dir_path, img_size):

    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = datasets.ImageFolder(root=dir_path, transform=transform)

    return dataset

In [3]:
160 / 4

40.0

In [4]:
train_dir_path = "bird_dataset/train/"
test_dir_path = "bird_dataset/test/"

train = load_dataset(train_dir_path, 160)
test = load_dataset(test_dir_path, 160)
print(train)

Dataset ImageFolder
    Number of datapoints: 1331
    Root location: bird_dataset/train/
    StandardTransform
Transform: Compose(
               Resize(size=(160, 160), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )


In [5]:
train_loader = torch.utils.data.DataLoader(train, batch_size=32)
test_loader = torch.utils.data.DataLoader(test, batch_size=32)

In [9]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.conv2 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn1 = torch.nn.BatchNorm2d(64)
        self.bn2 = torch.nn.BatchNorm2d(128)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(128 * 40 * 40, 128)
        self.fc2 = torch.nn.Linear(128, 64)
        self.fc3 = torch.nn.Linear(64, 7)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.bn2(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [10]:
model = CNN()

In [None]:
num_epochs = 7
learning_rate = 0.001

x = [epoch for epoch in range(0, num_epochs)]
y = []

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_f = torch.nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_f(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    loss = running_loss / len(train_loader)
    y.append(loss)
    print(f'epoch: {epoch+1}/{num_epochs}, loss: {loss}')


In [None]:
plt.plot(x, y)
plt.show()

In [10]:
torch.save(model.state_dict(), "model_weights.pth")

In [None]:
model = CNN()
model.load_state_dict(torch.load("model_weights.pth"))

In [None]:
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
all_preds = []
all_labels = []

In [12]:
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())


In [None]:
accuracy = accuracy_score(all_labels, all_preds)
print(f"acc: {accuracy}")

In [None]:
loaded_model = CNN()
loaded_model.load_state_dict(torch.load("model_weights.pth"))
loaded_model.eval()