In [2]:
from torch.utils.data import Dataset, DataLoader
from torch.nn import Module
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
from run import read_csv
from tqdm import tqdm 
from os.path import abspath, dirname, join

path_to_train = './tests/00_test_img_input/train/'
path_to_test = './tests/00_test_img_input/test/'


def get_device():
    if torch.cuda.is_available():
        device = torch.device("cuda")
        #print("Using GPU:", torch.cuda.get_device_name(device))
    else:
        device = torch.device("cpu")
        #print("Using CPU")
    return device


def format_number(number):
    return f"{number:05d}.jpg"


class ImDataset(Dataset):
    def __init__(self, train_gt, train_img_dir, train=True):
        self.data = []
        self.train_img_dir = train_img_dir
        self.train_gt = train_gt
        self.train = train
        
        if self.train:
            self.data = list(train_gt.keys())
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index, plot=False):
        if self.train:
            image_filename = self.data[index]
            marks = self.train_gt[image_filename]
            image = Image.open(self.train_img_dir + f"/{image_filename}")
            
            #добавляем каналов
            if image.mode != 'RGB':
                #print(index)
                image = image.convert('RGB')
            
            original_width, original_height = image.size
            new_size = (256, 256)
            
            transform = transforms.Compose([
                transforms.Resize(new_size),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
            
            tensor_image = transform(image)
            resized_marks = marks.copy()
            resized_marks[::2] *= new_size[0] / original_width
            resized_marks[1::2] *= new_size[1] / original_height
            tensor_resized_marks = torch.Tensor(resized_marks)

            if plot:
                plt.figure(figsize=(4.5, 4.5))
                plt.imshow(image)
                plt.scatter(marks[::2], marks[1::2], color='red', s=10)  # Рисуем точки на исходном изображении
                plt.title(f'{image_filename}')
                plt.show()
            return (tensor_image, tensor_resized_marks)


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=3, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(256 * 32 * 32, 64),
            nn.ReLU(),

            nn.Linear(64, 28),
        )
        
    def forward(self, x):
        return self.model(x)


def train_detector(train_gt, train_img_dir, fast_train=False):
    if fast_train:
        train_dataset = ImDataset(train_gt, train_img_dir, train=True)
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

        device = get_device()
        model = Model().to(device)

        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        num_epochs = 1

        for epoch in range(num_epochs):
            model.train()
            running_loss = None
            for images, marks in tqdm(train_loader):
                images, marks = images.to(device), marks.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, marks)
                loss.backward()
                optimizer.step()
                running_loss = loss.item if running_loss is None else (0.99 * running_loss + 0.01 * loss.item())
            
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
    else:
        train_dataset = ImDataset(train_gt, train_img_dir, train=True)
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

        device = get_device()
        model = Model().to(device)

        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        num_epochs = 20

        for epoch in range(num_epochs):
            model.train()
            running_loss = None
            for images, marks in tqdm(train_loader):
                images, marks = images.to(device), marks.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, marks)
                loss.backward()
                optimizer.step()
                running_loss = loss.item if running_loss is None else (0.99 * running_loss + 0.01 * loss.item())
            
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
        torch.save(model.state_dict(), 'facepoints_model.pt')
    return model

def detect():
    pass



In [3]:
train_gt = read_csv(join(path_to_train, 'gt.csv'))
train_img_dir = join(path_to_train, 'images')
A = ImDataset(train_gt, train_img_dir)

In [4]:
display(train_gt)

{'00000.jpg': array([ 51.,  51., 127.,  60., 169.,  65., 203.,  52.,  68.,  77.,  85.,
         75., 110.,  82., 160.,  87., 171.,  79., 193.,  84., 156., 144.,
         92., 174., 140., 180., 168., 179.]),
 '00001.jpg': array([21., 20., 36., 18., 57., 19., 80., 25., 23., 27., 34., 26., 40.,
        28., 59., 30., 68., 30., 78., 33., 43., 46., 31., 61., 45., 66.,
        60., 65.]),
 '00002.jpg': array([ 23., 116.,  39.,  83.,  47.,  57.,  56.,  19.,  36., 115.,  40.,
         96.,  47.,  87.,  59.,  60.,  59.,  41.,  62.,  31.,  91.,  86.,
        106., 122., 115., 100., 121.,  81.]),
 '00003.jpg': array([ 29.,  18.,  63.,  25.,  84.,  32., 105.,  34.,  38.,  31.,  48.,
         34.,  59.,  39.,  79.,  44.,  85.,  43.,  95.,  45.,  72.,  71.,
         46.,  83.,  63.,  87.,  75.,  89.]),
 '00004.jpg': array([ 31.,  53.,  76.,  44., 107.,  38., 136.,  33.,  40.,  67.,  52.,
         62.,  73.,  64., 104.,  58., 115.,  50., 129.,  49., 111., 105.,
         72., 137., 106., 132., 126., 1

In [130]:
for i in range(500):
    q = A.__getitem__(i)#, plot=True)
    print(q[1])

tensor([ 49.8321,  49.8321, 124.0916,  58.6260, 165.1298,  63.5115, 198.3512,
         50.8092,  66.4427,  75.2366,  83.0534,  73.2824, 107.4809,  80.1221,
        156.3359,  85.0076, 167.0840,  77.1908, 188.5802,  82.0763, 152.4275,
        140.7023,  89.8931, 170.0153, 136.7939, 175.8779, 164.1527, 174.9008])
tensor([ 57.1915,  54.4681,  98.0426,  49.0213, 155.2340,  51.7447, 217.8723,
         68.0851,  62.6383,  73.5319,  92.5957,  70.8085, 108.9362,  76.2553,
        160.6808,  81.7021, 185.1915,  81.7021, 212.4255,  89.8723, 117.1064,
        125.2766,  84.4255, 166.1277, 122.5532, 179.7447, 163.4043, 177.0213])
tensor([ 35.4699, 178.8916,  60.1446, 128.0000,  72.4819,  87.9036,  86.3614,
         29.3012,  55.5181, 177.3494,  61.6867, 148.0482,  72.4819, 134.1687,
         90.9880,  92.5301,  90.9880,  63.2289,  95.6145,  47.8072, 140.3374,
        132.6265, 163.4699, 188.1446, 177.3494, 154.2169, 186.6024, 124.9157])
tensor([ 59.8710,  37.1613, 130.0645,  51.6129, 173.4194,  66

In [91]:
for i in range(1000):
    print(format_number(i))

000000.jpg
000001.jpg
000002.jpg
000003.jpg
000004.jpg
000005.jpg
000006.jpg
000007.jpg
000008.jpg
000009.jpg
000010.jpg
000011.jpg
000012.jpg
000013.jpg
000014.jpg
000015.jpg
000016.jpg
000017.jpg
000018.jpg
000019.jpg
000020.jpg
000021.jpg
000022.jpg
000023.jpg
000024.jpg
000025.jpg
000026.jpg
000027.jpg
000028.jpg
000029.jpg
000030.jpg
000031.jpg
000032.jpg
000033.jpg
000034.jpg
000035.jpg
000036.jpg
000037.jpg
000038.jpg
000039.jpg
000040.jpg
000041.jpg
000042.jpg
000043.jpg
000044.jpg
000045.jpg
000046.jpg
000047.jpg
000048.jpg
000049.jpg
000050.jpg
000051.jpg
000052.jpg
000053.jpg
000054.jpg
000055.jpg
000056.jpg
000057.jpg
000058.jpg
000059.jpg
000060.jpg
000061.jpg
000062.jpg
000063.jpg
000064.jpg
000065.jpg
000066.jpg
000067.jpg
000068.jpg
000069.jpg
000070.jpg
000071.jpg
000072.jpg
000073.jpg
000074.jpg
000075.jpg
000076.jpg
000077.jpg
000078.jpg
000079.jpg
000080.jpg
000081.jpg
000082.jpg
000083.jpg
000084.jpg
000085.jpg
000086.jpg
000087.jpg
000088.jpg
000089.jpg
000090.jpg

In [30]:
A = ImDataset(path_to_train)

In [31]:
A.__getitem__(0)

(262, 262, 3)


(array([[[  0,   7,  11],
         [  0,  12,  13],
         [ 10,  17,  10],
         ...,
         [147, 163, 178],
         [147, 163, 178],
         [147, 163, 178]],
 
        [[ 25,  35,  34],
         [  0,   3,   0],
         [  0,   2,   0],
         ...,
         [147, 163, 178],
         [147, 163, 178],
         [147, 163, 178]],
 
        [[  4,   2,   0],
         [  8,   5,   0],
         [ 21,  13,   0],
         ...,
         [147, 163, 178],
         [147, 163, 178],
         [147, 163, 178]],
 
        ...,
 
        [[ 84, 157, 166],
         [ 70, 137, 154],
         [ 60, 119, 151],
         ...,
         [119,  84,  46],
         [112,  77,  39],
         [110,  75,  37]],
 
        [[ 64, 137, 144],
         [ 68, 137, 152],
         [ 98, 162, 190],
         ...,
         [120,  85,  47],
         [113,  78,  40],
         [110,  75,  37]],
 
        [[ 51, 124, 130],
         [ 78, 150, 162],
         [119, 184, 212],
         ...,
         [121,  86,  48],
  

In [None]:
class ImDataset(Dataset):
    def __init__(self, path_to_dir, train=True):
        self.data = []
        self.path_to_dir = path_to_dir
        self.train = train
        
        if self.train:
            df_gt_train = pd.read_csv(path_to_dir + 'gt.csv')
            self.len = df_gt_train.shape[0]
            for index, row in df_gt_train.iterrows():
                image_filename = row['filename']
                
                marks = np.array([
                    (row['x1'], row['y1']), (row['x2'], row['y2']),
                    (row['x3'], row['y3']), (row['x4'], row['y4']),
                    (row['x5'], row['y5']), (row['x6'], row['y6']),
                    (row['x7'], row['y7']), (row['x8'], row['y8']),
                    (row['x9'], row['y9']), (row['x10'], row['y10']),
                    (row['x11'], row['y11']), (row['x12'], row['y12']),
                    (row['x13'], row['y13']), (row['x14'], row['y14'])
                ])
                self.data.append((image_filename, marks))
    def __len__(self):
        return self.len
    def __getitem__(self, index, plot=False):
        if self.train:
            image_filename, marks = self.data[index]
            image = Image.open(self.path_to_dir + f"images/{image_filename}")
            
            #добавляем каналов
            if image.mode != 'RGB':
                #print(index)
                image = image.convert('RGB')
            
            original_width, original_height = image.size
            new_size = (256, 256)
            
            transform = transforms.Compose([
                transforms.Resize(new_size),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
            
            tensor_image = transform(image)
            resized_marks = torch.Tensor((marks * np.array([new_size[0] / original_width, new_size[1] / original_height])).flatten())

            if plot:
                plt.figure(figsize=(4.5, 4.5))
                plt.imshow(image)
                plt.scatter(marks[:, 0], marks[:, 1], color='red', s=10)  # Рисуем точки на исходном изображении
                plt.title(f'{image_filename}')
                plt.show()
            return (tensor_image, resized_marks)