In [5]:
import torch
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from sklearn.metrics import f1_score
import torch.nn.functional as F
from torchvision import transforms
import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
from torchvision import transforms
from torchsummary import summary

In [6]:
class ImageToNumDataset(Dataset):
    def __init__(self, img_dir, transform=None, answers_file=None):
        self.img_dir = img_dir
        self.transform = transform
        self.answers_file = answers_file

        if self.answers_file is not None:
            self.img_labels = pd.read_csv(answers_file)
        else:
            self.img_labels = None
        
        self.image_filenames = [file for file in os.listdir(img_dir) if file.endswith('.png')]
        self.image_filenames.sort(key=lambda x: int(x.replace("img_", "", 1).replace(".png", "", 1)))

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("L")

        if self.transform:
            image = self.transform(image)

        if self.img_labels is not None:
            label = self.img_labels.iloc[idx, 1]
            return image, label
        else:
            return image

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")
dataset = ImageToNumDataset(img_dir="C:/Users/rosti/Desktop/data/train_images_after_model", transform=transform, answers_file="C:/Users/rosti/Desktop/data/train_answers.csv")
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Using cpu device


In [8]:
import torch.nn as nn
from torchsummary import summary

class EnhancedNoMaskModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.2)

        # Increase the number of convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3, stride=1, padding=1)

        self.bn1 = nn.BatchNorm2d(6)
        self.bn2 = nn.BatchNorm2d(12)
        self.bn3 = nn.BatchNorm2d(24)
        self.bn4 = nn.BatchNorm2d(48)

        self.relu = nn.ReLU()

        # Adjusting the size for the linear layer input
        self.input_liner = nn.Linear(48 * 16 * 16, 48 * 16)
        self.liner1 = nn.Linear(48 * 16, 48)
        self.output_liner = nn.Linear(48, 3)

        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.pool(self.relu(x))

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.pool(self.relu(x))

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.pool(self.relu(x))

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.pool(self.relu(x))

        # Adjusting the flattening based on the new output size
        x = x.view(-1, 48 * 16 * 16)

        x = self.relu(self.input_liner(x))
        x = self.dropout(x)
        x = self.liner1(x)
        x = self.output_liner(x)

        # Applying Softmax to get probability distribution
        x = self.softmax(x)

        return x

# Create the model instance
model = EnhancedNoMaskModel()

# Summary for input size (1, 256, 256)
summary(model, (1, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 6, 256, 256]              60
       BatchNorm2d-2          [-1, 6, 256, 256]              12
              ReLU-3          [-1, 6, 256, 256]               0
         MaxPool2d-4          [-1, 6, 128, 128]               0
            Conv2d-5         [-1, 12, 128, 128]             660
       BatchNorm2d-6         [-1, 12, 128, 128]              24
              ReLU-7         [-1, 12, 128, 128]               0
         MaxPool2d-8           [-1, 12, 64, 64]               0
            Conv2d-9           [-1, 24, 64, 64]           2,616
      BatchNorm2d-10           [-1, 24, 64, 64]              48
             ReLU-11           [-1, 24, 64, 64]               0
        MaxPool2d-12           [-1, 24, 32, 32]               0
           Conv2d-13           [-1, 48, 32, 32]          10,416
      BatchNorm2d-14           [-1, 48,

In [9]:
model = EnhancedNoMaskModel()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
def calculate_f1(loader):
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
    return f1_score(all_labels, all_preds, average='macro')

In [12]:
# Обучение
num_epochs = 1
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    f1 = calculate_f1(test_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}, F1-Score: {f1}")
    model_path = f"output_models/model_main{epoch+1}.pth" 
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

print("Training Complete")


Epoch [1/1], Loss: 1.2223237191929537, F1-Score: 0.17148097203117832
Model saved to output_models/model_main1.pth
Training Complete
