In [1]:
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import PIL
import pandas as pd
import os
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm

In [2]:
from sklearn.model_selection import train_test_split

# Step 1: Read the CSV file
data = pd.read_csv('train.csv')  # Adjust the file path as needed

# Step 2: Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)
train_size = 0.8  # 80% of the data for training
test_size = 1 - train_size

# Step 4: Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=test_size, random_state=42)

# Step 5: Save the split datasets into separate CSV files if needed
train_data.to_csv('train_data.csv', index=False)
test_data.to_csv('test_data.csv', index=False)

In [3]:
# Кастомный датасет, который загружает csv и на каждое обращение возвращает картинку и 28 точек с ней ассоциированных
import math

from PIL import ImageDraw
import numpy as np
import torchvision.transforms.functional as F
from torchvision.transforms import v2
import matplotlib.pyplot as plt


class CustomDataset(Dataset):
    def __init__(self, csv_file,transform = True,device="cpu"):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.target_size = (224,224)
        self.device = device
        
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 0]  # Assuming the filename column is the first one
        image = Image.open(os.path.join("train", img_name))
        if image.mode == 'L':
                image = image.convert('RGB')
        key_points = self.data.iloc[idx, 1:].values.astype('float32')# Assuming key points start from second column
        if self.transform:
            image_tensor,key_points = self._transform(image, key_points)
        else:
            original_image_size = image.size[::-1]  # (width, height)
            target_image_size = (224, 224)  # (width, height)
            scale_x = target_image_size[0] / original_image_size[0]
            scale_y = target_image_size[1] / original_image_size[1]
            # key_points = self.data.iloc[idx, 1:].values.astype('float32')
            resized_key_points = key_points.reshape(-1, 2) * [scale_x, scale_y]
            key_points = resized_key_points.flatten()  # Flatten back to 1D array
            key_points = torch.tensor(key_points.reshape(-1)).float()
            image = v2.Resize((224,224))(image)
            image_tensor = v2.ToTensor()(image)
            # image_tensor = v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image_tensor)
            
        # self._visualize_keypoints(image_tensor, key_points)
        return image_tensor, key_points
    
    def _transform(self, image, key_points):
        original_image_size = image.size[::-1]  # (width, height)
        target_image_size = (224, 224)  # (width, height)
        scale_x = target_image_size[0] / original_image_size[0]
        scale_y = target_image_size[1] / original_image_size[1]
        # key_points = self.data.iloc[idx, 1:].values.astype('float32')
        resized_key_points = key_points.reshape(-1, 2) * [scale_x, scale_y]
        key_points = resized_key_points.flatten()
        image = v2.Resize((224,224))(image)
        
        if np.random.rand() < 0.5:
            
            image = v2.functional.hflip(image)
            num_keypoints = key_points.size // 2
            image_width = self.target_size[0]
            center_x = image_width / 2
            
            # Adjust the x-coordinates of keypoints based on their semantic positions
            for i in range(num_keypoints):
                x = key_points[i * 2]
                if x < center_x:  # Only flip keypoints on the left side of the image
                    distance_to_center = center_x - x
                    key_points[i * 2] = center_x + distance_to_center
                else:
                    distance_to_center = x - center_x
                    key_points[i * 2] = center_x - distance_to_center
                    
            pairs_to_swap = [(0, 3), (1, 2), (4, 9), (5, 8), (6, 7), (11, 13)]
            for a, b in pairs_to_swap:
                key_points[a*2], key_points[b*2] = key_points[b*2], key_points[a*2]
                key_points[a*2+1], key_points[b*2+1] = key_points[b*2+1], key_points[a*2+1]
        
        angle = np.random.uniform(-180, 180)  # Rotate between -30 and 30 degrees
        image = v2.functional.rotate(image,-angle)
        theta = np.radians(angle)
        rotation_matrix = np.array([
            [math.cos(theta), -math.sin(theta)],
            [math.sin(theta), math.cos(theta)]
        ])
        key_points_xy = key_points.reshape(-1, 2)
        rotated_key_points_xy = np.dot(rotation_matrix, (key_points_xy - [112, 112]).T).T + [112, 112]
        rotated_key_points = rotated_key_points_xy.reshape(-1)
        key_points = rotated_key_points_xy.reshape(-1)
        
        image = v2.ToTensor()(image)
        # image = v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image)

        # Reshape key_points back to a one-dimensional array
        key_points = torch.tensor(key_points.reshape(-1)).float()
        
        return image, key_points
    
    
    
    def _visualize_keypoints(self, image, key_points):
        # Convert tensor to PIL image for visualization
        image = v2.ToPILImage()(image)
        draw = ImageDraw.Draw(image)

        # Convert keypoints to numpy array and reshape to (num_points, 2)
        key_points = key_points.numpy().reshape(-1, 2)
        
        # Draw keypoints on the image
        for (x, y) in key_points:
            # Draw a circle for each keypoint
            draw.ellipse((x-2, y-2, x+2, y+2), fill='red')
        
        # Display the image with keypoints
        plt.figure(figsize=(8, 8))
        plt.imshow(image)
        plt.axis('off')
        plt.show()

In [4]:
# Используем GPU, если есть
device = ("cuda" if torch.cuda.is_available() else "cpu") 

In [5]:
# Convolutional neural network которая с нормализацией в виде BatchNorm и регуляризацией Dropout
class FacePointsClassification(nn.Module):
    def __init__(self):
        super(FacePointsClassification, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            
            nn.BatchNorm2d(32),  # Batch normalization
            nn.Dropout(0.2),  # Dropout regularization
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),  # Batch normalization
            # nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),  # Batch normalization
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(256),  # Batch normalization
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(512),  # Batch normalization
            nn.Flatten(),
            nn.Linear(512, 512),  # Increase model capacity
            nn.Dropout(0.5),  # Dropout regularization
            nn.Linear(512, 28)
        )
    
    def forward(self, x):
        x = self.model(x)
        return x

In [6]:
import numpy as np
import torch.nn.functional as F
import torch.nn as nn

class HourglassBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(HourglassBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.conv4 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = nn.functional.relu(self.bn1(self.conv1(x)))
        x = nn.functional.relu(self.bn2(self.conv2(x)))
        skip = x
        x = self.downsample(x)
        x = nn.functional.relu(self.bn3(self.conv3(x)))
        x = nn.functional.relu(self.bn4(self.conv4(x)))
        x = nn.functional.interpolate(x, scale_factor=2, mode='nearest')
        x += skip
        return x

class HourglassNetwork(nn.Module):
    def __init__(self, num_stages=2, num_channels=32, num_keypoints=28, dropout=0.5, weight_decay=1e-5):
        super(HourglassNetwork, self).__init__()
        self.num_stages = num_stages
        self.num_channels = num_channels
        self.init_conv = nn.Conv2d(3, num_channels, kernel_size=7, stride=2, padding=3)
        
        # Hourglass blocks
        self.hourglass_stages = nn.ModuleList([
            self._build_hourglass_stage(num_channels) for _ in range(num_stages)
        ])
        
        # Keypoints head
        self.keypoints_head = nn.Conv2d(num_channels, num_keypoints, kernel_size=1)

        # Regularization
        self.dropout = nn.Dropout(dropout)
        self.weight_decay = weight_decay

    def _build_hourglass_stage(self, num_channels):
        layers = []
        for _ in range(3):
            layers.append(HourglassBlock(num_channels, num_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        # Initial convolution
        x = nn.functional.relu(self.init_conv(x))
        
        intermediate_outputs = []
        # Hourglass blocks
        for stage in self.hourglass_stages:
            x = stage(x)
            intermediate_outputs.append(x)
        
        # Keypoints head
        keypoints = self.keypoints_head(x)
        return keypoints, intermediate_outputs

# Additional layers for post-processing
class PostProcessingModule(nn.Module):
    def __init__(self, input_size, output_size):
        super(PostProcessingModule, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Combined network
class CombinedNetwork(nn.Module):
    def __init__(self, num_stages=1, num_channels=64, num_keypoints=28, postprocessing_input_size=351232, postprocessing_output_size=28):
        super(CombinedNetwork, self).__init__()
        self.hourglass = HourglassNetwork(num_stages, num_channels, num_keypoints)
        self.postprocessing = PostProcessingModule(postprocessing_input_size, postprocessing_output_size)

    def forward(self, x):
        keypoints, intermediate_outputs = self.hourglass(x)
        processed_output = self.postprocessing(keypoints)
        return processed_output

In [7]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=28):  # 28 coordinates (14 keypoints * 2)
        super(UNet, self).__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.inc = DoubleConv(in_channels, 64)
        self.down1 = nn.MaxPool2d(2)
        self.conv1 = DoubleConv(64, 128)
        
        self.down2 = nn.MaxPool2d(2)
        self.conv2 = DoubleConv(128, 256)
        
        self.down3 = nn.MaxPool2d(2)
        self.conv3 = DoubleConv(256, 512)
        
        self.up1 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.conv4 = DoubleConv(512, 256)
        
        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv5 = DoubleConv(256, 128)
        
        self.up3 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv6 = DoubleConv(128, 64)
        
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 224 * 224, 16),  # Fully connected layer with 1024 units
            nn.ReLU(inplace=True),
            nn.Linear(16, out_channels)  # Output layer with 28 units (14 keypoints * 2)
        )

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.conv1(self.down1(x1))
        x3 = self.conv2(self.down2(x2))
        x4 = self.conv3(self.down3(x3))
        
        x = self.up1(x4)
        x = self.crop_and_concat(x, x3)
        x = self.conv4(x)
        
        x = self.up2(x)
        x = self.crop_and_concat(x, x2)
        x = self.conv5(x)
        
        x = self.up3(x)
        x = self.crop_and_concat(x, x1)
        x = self.conv6(x)
        
        x = self.fc(x)
        return x
    
    def crop_and_concat(self, upsampled, bypass):
        """
        Crop the `bypass` tensor to match the size of the `upsampled` tensor and concatenate them.
        """
        _, _, h, w = upsampled.size()
        bypass = self.center_crop(bypass, h, w)
        return torch.cat((upsampled, bypass), 1)
    
    def center_crop(self, layer, target_height, target_width):
        _, _, layer_height, layer_width = layer.size()
        diff_y = (layer_height - target_height) // 2
        diff_x = (layer_width - target_width) // 2
        return layer[:, :, diff_y:(diff_y + target_height), diff_x:(diff_x + target_width)]


In [8]:
class SingleConv(nn.Module):
    """(convolution => [BN] => ReLU)"""
    def __init__(self, in_channels, out_channels):
        super(SingleConv, self).__init__()
        self.single_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.single_conv(x)

class MediumUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=28):  # 28 coordinates (14 keypoints * 2)
        super(MediumUNet, self).__init__()

        self.inc = SingleConv(in_channels, 64) 
        self.down1 = nn.Sequential(nn.MaxPool2d(2), SingleConv(64, 128)) 
        self.down2 = nn.Sequential(nn.MaxPool2d(2), SingleConv(128, 256)) 
        self.down3 = nn.Sequential(nn.MaxPool2d(2), SingleConv(256, 512))
        self.down4 = nn.Sequential(nn.MaxPool2d(2), SingleConv(512, 1024))
        
        self.up1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.conv1 = SingleConv(1024, 512)
        
        self.up2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.conv2 = SingleConv(512, 256)
        
        self.up3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv3 = SingleConv(256, 128)
        
        self.up4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv4 = SingleConv(128, 64)

        # Global Average Pooling instead of flattening
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 512),
            nn.ReLU(),
            nn.Linear(512, 256),  
            nn.ReLU(),
            nn.Linear(256, out_channels)
        )

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5)
        x = torch.cat([x, x4], dim=1)
        x = self.conv1(x)
        x = self.up2(x)
        x = torch.cat([x, x3], dim=1)
        x = self.conv2(x)

        x = self.up3(x)
        x = torch.cat([x, x2], dim=1)
        x = self.conv3(x)

        x = self.up4(x)
        x = torch.cat([x, x1], dim=1)
        x = self.conv4(x)
        
        x = self.gap(x)
        x = self.fc(x)
        return x

In [9]:
# Загружаем предобученную модель ResNet и устанавливаем ей Feature Layer так, как нужно нам -- 14 точек на лице
# model = models.resnet18()
# model.train()
# num_features = model.fc.in_features
# num_keypoints = 14  
# model.fc = nn.Sequential(
#     nn.Dropout(0.2),
#     nn.Linear(num_features, 1024),
#     nn.ReLU(inplace=True),
#     nn.Linear(1024, num_keypoints * 2)  # Каждая точка -- это два числа: x и y
# )
# model.to(device)
# if os.path.exists('resnet.pth'):
#     model.load_state_dict(torch.load('resnet.pth', map_location=device))

In [10]:
# Используем свою модель на основе Hourglass Network и загружаем на cuda
# model = CombinedNetwork().to(device)
# model.load_state_dict(torch.load('model.pth', map_location=device))

In [11]:
model = MediumUNet().to(device)
model.load_state_dict(torch.load('model_7.pth', map_location=device))

<All keys matched successfully>

In [12]:
# model = FacePointsClassification().to(device)

In [13]:
train_dataset = CustomDataset(csv_file='train_data.csv', device=device, transform=True)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataset = CustomDataset(csv_file='test_data.csv', device=device, transform=False)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)

In [14]:

from torch.optim.lr_scheduler import ReduceLROnPlateau
# Определяем функцию потерь по заданию и алгоритм оптимизации
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0074, weight_decay=1e-4)
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

In [15]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, key_points in tqdm(train_dataloader):
        images = images.to(device)
        key_points = key_points.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, key_points)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_dataloader.dataset)
    
    # Validate the model on the validation set
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_images, val_key_points in tqdm(val_dataloader):
            val_images = val_images.to(device)
            val_key_points = val_key_points.to(device)
            val_outputs = model(val_images)
            val_loss += criterion(val_outputs, val_key_points).item() * val_images.size(0)
    
    val_loss /= len(val_dataloader.dataset)

    # Step the scheduler with the validation loss
    lr_scheduler.step()
    print(lr_scheduler.get_last_lr())
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss}. Val loss: {val_loss}')
    torch.save(model.state_dict(), 'model_8.pth')

100%|██████████| 500/500 [04:30<00:00,  1.85it/s]
100%|██████████| 1000/1000 [00:29<00:00, 33.75it/s]


[0.007392698895184605]
Epoch 1/100, Train Loss: 214.03321531677247. Val loss: 321.8982024130821


100%|██████████| 500/500 [04:21<00:00,  1.91it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.41it/s]


[0.007370824394863569]
Epoch 2/100, Train Loss: 203.2440328979492. Val loss: 164.92266852378845


100%|██████████| 500/500 [04:20<00:00,  1.92it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s]


[0.0073344628276961485]
Epoch 3/100, Train Loss: 194.62450736236573. Val loss: 262.2246756086349


100%|██████████| 500/500 [04:17<00:00,  1.94it/s]
100%|██████████| 1000/1000 [00:26<00:00, 38.34it/s]


[0.007283757696175935]
Epoch 4/100, Train Loss: 185.66132890319824. Val loss: 387.2910668554306


100%|██████████| 500/500 [03:54<00:00,  2.13it/s]
100%|██████████| 1000/1000 [00:26<00:00, 38.38it/s]


[0.007218909110292068]
Epoch 5/100, Train Loss: 168.24881130981444. Val loss: 272.7255507297516


100%|██████████| 500/500 [03:55<00:00,  2.13it/s]
100%|██████████| 1000/1000 [00:26<00:00, 38.32it/s]


[0.00714017299778653]
Epoch 6/100, Train Loss: 186.17786496734618. Val loss: 325.5882059555054


100%|██████████| 500/500 [04:19<00:00,  1.93it/s]
100%|██████████| 1000/1000 [00:28<00:00, 34.49it/s]


[0.007047860094124273]
Epoch 7/100, Train Loss: 176.698814994812. Val loss: 119.75628626632691


100%|██████████| 500/500 [04:18<00:00,  1.93it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.48it/s]


[0.006942334716162295]
Epoch 8/100, Train Loss: 179.59666694641112. Val loss: 167.16221603870392


100%|██████████| 500/500 [04:20<00:00,  1.92it/s]
100%|██████████| 1000/1000 [00:28<00:00, 34.50it/s]


[0.006824013324357456]
Epoch 9/100, Train Loss: 157.91110636138916. Val loss: 259.414092751503


100%|██████████| 500/500 [04:19<00:00,  1.93it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.45it/s]


[0.006693362879187306]
Epoch 10/100, Train Loss: 151.6955668563843. Val loss: 345.08406630420683


100%|██████████| 500/500 [04:17<00:00,  1.94it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s]


[0.006550898998270421]
Epoch 11/100, Train Loss: 168.68543017578125. Val loss: 160.29490923166276


100%|██████████| 500/500 [04:17<00:00,  1.94it/s]
100%|██████████| 1000/1000 [00:29<00:00, 34.47it/s]


[0.006397183921459223]
Epoch 12/100, Train Loss: 147.59846968841552. Val loss: 101.26050327205658


 31%|███       | 156/500 [01:17<02:50,  2.01it/s]


KeyboardInterrupt: 

In [None]:


with torch.no_grad():
    model.eval()
    for images, key_points in tqdm(test_dataloader):
        images, key_points = images.to(device), key_points.to(device)
        outputs = model(images)
        loss = criterion(outputs, key_points)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:19<00:00, 50.36it/s]

Epoch 40/100, Loss: 13472.1572265625





In [None]:
import PIL.ImageDraw

# model = CombinedNetwork().to(device)
# model.load_state_dict(torch.load('model.pth', map_location=device))
model.eval()
transform = transforms.Compose(
    [
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    ]
)
folder_path = "test"
results = []
with torch.no_grad():
    model.eval()
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Adjust file extensions as needed
            
            image_path = os.path.join(folder_path, filename)
            image = Image.open(image_path)
            original_image_size = image.size[::-1]
            target_image_size = (224,224)
            scale_x = original_image_size[0] / target_image_size[0]
            scale_y = original_image_size[1] / target_image_size[1]
            
            image = v2.Resize((224,224))(image)
            image_tensor = v2.ToTensor()(image)
            back = transforms.Compose([transforms.ToPILImage(), transforms.Resize(original_image_size)])
            image_tensor = v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image_tensor)
            outputs = model(image_tensor.unsqueeze(0).to(device))
            outputs[::2] *= scale_x
            outputs[1::2] *= scale_y
            image = back(image_tensor.squeeze(0))
            # draw = PIL.ImageDraw.Draw(image)
            outputs = outputs.squeeze().cpu().numpy().tolist()
            outputs = [round(coord) for coord in outputs]
            # for i in range(outputs.shape[0] // 2):
                # draw.ellipse((outputs[2*i] - 3, outputs[2*i+1] - 3,outputs[2*i] + 3, outputs[2*i+1] + 3), 'red',2)
                
            results.append([filename] + outputs)
columns = ['filename']
for i in range(1, 15):
    columns.extend([f'x{i}', f'y{i}'])
df = pd.DataFrame(results, columns=columns)

df.to_csv('test.csv', index=False)

In [None]:
test = pd.read_csv('train.csv')
idx = 0
img_name = test.iloc[idx, 0]  # Assuming the filename column is the first one
image = Image.open(os.path.join("train", img_name))
if image.mode == 'RGB':  # Grayscale image
    # Convert grayscale image to RGB
    image = image.convert('L')
image = image.convert('RGB')
key_points = test.iloc[idx, 1:].values.astype('float32')# Assuming key points start from second column
for key in key_points:
    key = torch.tensor(key).to(device)
colors = [
                'red', 'green', 'blue', 'yellow', 'purple', 'orange', 'cyan', 'magenta', 'lime', 'pink',
                'teal', 'lavender', 'brown', 'beige'
            ]

# image = back(image_tensor)
draw = PIL.ImageDraw.Draw(image)
for i in range(int(key_points.shape[0] / 2)):
    draw.ellipse((key_points[2*i] - 3, key_points[2*i+1] - 3,key_points[2*i] + 3, key_points[2*i+1] + 3), colors[i],2)
image.show()
image.save("train.png")

In [16]:
folder_path = "test"  # Change this to the path of your image folder
transform = transforms.Compose(
    [
    transforms.Resize((224, 224)),
    transforms.ToTensor()
    ]
)
model = MediumUNet().to(device)
model.load_state_dict(torch.load("model_8.pth"))

with torch.no_grad():
    model.eval()
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Adjust file extensions as needed
            print(filename)
            image_path = os.path.join(folder_path, filename)
            image = Image.open(image_path)
            if image.mode == 'RGB':
                image = image.convert('L')
            image = image.convert('RGB')
            image_tensor = transform(image)
            original_image_size = image.size[::-1]
            
            # Get the model output
            output = model(image_tensor.to(device).unsqueeze(0))
            back = transforms.Compose([transforms.ToPILImage()])
            output = output.squeeze().cpu().numpy()
            image = back(image_tensor)
            draw = PIL.ImageDraw.Draw(image)
            colors = [
                'red', 'green', 'blue', 'yellow', 'purple', 'orange', 'cyan', 'magenta', 'lime', 'pink',
                'teal', 'lavender', 'brown', 'beige'
            ]
            for i in range(0, len(output), 2):
                draw.ellipse((output[i] - 2, output[i + 1] - 2, output[i] + 2, output[i + 1] + 2), fill=colors[i // 2])
            # Show the image
            image.show()
            print(output)


00008.jpg
[ 51.190086  44.68474   94.28988   47.75281  139.37932   50.42398
 182.89108   55.827522  59.352192  64.41544   75.81185   64.83055
  90.545876  67.84083  140.71674   71.36037  157.02075   70.74526
 173.0281    73.009544 109.552185 116.9162    77.74251  152.8554
 108.36851  156.51566  139.79965  155.88333 ]
00014.jpg
[ 31.290686  76.16387   70.600845  61.86674  111.14813   47.055336
 151.23613   34.79238   46.60512   89.56136   61.495636  83.385864
  75.87622   80.578125 120.45459   65.2323   134.43771   57.639114
 149.34256   53.929615 110.225494 115.002426  94.781265 157.88368
 123.26252  149.50348  150.8914   136.93298 ]
00015.jpg
[ 52.039543  42.14046   97.6141    46.126583 143.35995   50.624264
 189.0402    59.28097   60.321712  61.948578  77.5386    62.60893
  93.15272   66.83176  143.97093   72.493256 160.96423   72.21099
 177.54556   75.63316  110.73839  116.52202   75.574135 151.8795
 107.40419  157.14986  139.5967   157.92072 ]
00017.jpg
[ 32.202423  71.61119   72.9

KeyboardInterrupt: 