In [15]:
import os
import csv

image_dir = "./faces"

csv_file = "./podatci.csv"

def extract_info(file_name):
    parts = file_name.split("_")
    
    if len(parts) < 2:
        print(f"Greška: Naziv datoteke '{file_name}' nema dovoljno dijelova.")
        return None, None
    
    age = int(parts[0])
    
    if len(parts[1]) < 1:
        print(f"Greška: Drugi dio naziva datoteke '{file_name}' nema dovoljno znakova za ekstrakciju spola.")
        return None, None
    
    gender = int(parts[1][0])
    
    return age, gender

with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Age', 'Gender'])
    
    for file_name in os.listdir(image_dir):
        if file_name.endswith(".jpg"):
            age, gender = extract_info(file_name)
            
            if age and gender:
                writer.writerow([file_name, age, gender])

print("Podatci su uspješno spremljeni u CSV datoteku.")


Podatci su uspješno spremljeni u CSV datoteku.


In [16]:
from PIL import Image

def load_images(image_dir, csv_file):
    if not os.path.exists(image_dir):
        print(f"Greška: Direktorij '{image_dir}' ne postoji.")
        return
    
    with open(csv_file, mode='r') as file:
        reader = csv.reader(file)
        next(reader)
        
        for row in reader:
            file_name = row[0]  
            age = int(row[1]) 
            gender = int(row[2])#
            
            image_path = os.path.join(image_dir, file_name)
            
            if not os.path.exists(image_path):
                print(f"Upozorenje: Slika '{file_name}' ne postoji na navedenoj putanji.")
                continue
            
            image = Image.open(image_path)
            
        
            print(f"Učitana slika '{file_name}': Dob={age}, Spol={'Muško' if gender == 0 else 'Žensko'}")

load_images(image_dir, csv_file)

Učitana slika '100_1_0_20170112213001988.jpg': Dob=100, Spol=Žensko
Učitana slika '100_1_0_20170112213303693.jpg': Dob=100, Spol=Žensko
Učitana slika '100_1_0_20170119212053665.jpg': Dob=100, Spol=Žensko
Učitana slika '100_1_2_20170110182836729.jpg': Dob=100, Spol=Žensko
Učitana slika '100_1_2_20170112213615815.jpg': Dob=100, Spol=Žensko
Učitana slika '101_1_2_20170105174739309.jpg': Dob=101, Spol=Žensko
Učitana slika '105_1_0_20170112213001988.jpg': Dob=105, Spol=Žensko
Učitana slika '105_1_1_20170112213303693.jpg': Dob=105, Spol=Žensko
Učitana slika '10_1_0_20161220222001459.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109201728056.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109202346880.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109203218966.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109203427416.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109203501969.jpg': Dob=10, Spol=Žensko
Učitana slika '10_1_0_20170109203512075.jpg': Dob=10, Spol=Ž

In [17]:
import random
from shutil import copyfile

train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

with open(csv_file, mode='r') as file:
    reader = csv.reader(file)
    next(reader)
    
    file_list = []
    
    for row in reader:
        file_name = row[0]
        file_list.append(file_name)

num_files = len(file_list)
num_train = int(train_ratio * num_files)
num_val = int(val_ratio * num_files)
num_test = num_files - num_train - num_val

random.shuffle(file_list)

train_files = file_list[:num_train]
val_files = file_list[num_train:num_train+num_val]
test_files = file_list[num_train+num_val:]

def copy_files(file_list, destination_dir):
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)
    
    for file_name in file_list:
        source_path = os.path.join(image_dir, file_name)
        destination_path = os.path.join(destination_dir, file_name)
        copyfile(source_path, destination_path)

copy_files(train_files, "train")
copy_files(val_files, "validation")
copy_files(test_files, "test")

print("Podjela skupa podataka je uspješno završena.")

Podjela skupa podataka je uspješno završena.


In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GenderAgeCNN(nn.Module):
    def __init__(self):
        super(GenderAgeCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(32 * 16 * 16, 256)
        self.fc2_gender = nn.Linear(256, 2)
        self.fc2_age = nn.Linear(256, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = F.relu(self.fc1(x))
        gender_output = self.fc2_gender(x)
        age_output = self.fc2_age(x)
        return gender_output, age_output

# Inicijalizacija modela
model = GenderAgeCNN()
print(model)


GenderAgeCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=8192, out_features=256, bias=True)
  (fc2_gender): Linear(in_features=256, out_features=2, bias=True)
  (fc2_age): Linear(in_features=256, out_features=1, bias=True)
)


In [19]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms
from torchvision.datasets import ImageFolder
from PIL import Image

# Transformacije slika
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dir = "./train"
val_dir = "./validation"
test_dir = "./test"

os.makedirs(os.path.join(train_dir, "male"), exist_ok=True)
os.makedirs(os.path.join(train_dir, "female"), exist_ok=True)

os.makedirs(os.path.join(val_dir, "male"), exist_ok=True)
os.makedirs(os.path.join(val_dir, "female"), exist_ok=True)

# Funkcija za ekstrakciju informacija iz naziva datoteke
def extract_info(file_name):
    parts = file_name.split('_')
    age = int(parts[0])
    gender = int(parts[1]) 
    return age, gender

def move_images(source_dir, dest_train_dir, dest_val_dir):
    for file_name in os.listdir(source_dir):
        age, gender = extract_info(file_name)
        target_dir = dest_train_dir if torch.rand(1).item() < 0.8 else dest_val_dir
        target_dir = os.path.join(target_dir, "male" if gender == 0 else "female")
        shutil.copy(os.path.join(source_dir, file_name), os.path.join(target_dir, file_name))

move_images("./faces", train_dir, val_dir)

# Prilagodba klase Dataset za učitavanje podataka o spolu i dobi
class GenderAgeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        self.labels = []

        for gender_label, gender in enumerate(['male', 'female']):
            gender_dir = os.path.join(root_dir, gender)
            for img_name in os.listdir(gender_dir):
                img_path = os.path.join(gender_dir, img_name)
                self.samples.append(img_path)
                self.labels.append((gender_label, int(img_name.split('_')[0])))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path = self.samples[idx]
        img = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.labels[idx])
        
        if self.transform:
            img = self.transform(img)

        return img, label

# Kreiranje instanci skupova podataka
train_data = GenderAgeDataset(train_dir, transform=transform)
val_data = GenderAgeDataset(val_dir, transform=transform)

# Data loaders
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)

criterion = nn.CrossEntropyLoss() 
mse_loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Funkcija za treniranje modela
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_corrects = 0
        
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            gender_targets, age_targets = targets[:, 0], targets[:, 1].float()
            gender_outputs, age_outputs = model(inputs)
            gender_loss = criterion(gender_outputs, gender_targets)
            age_loss = mse_loss(age_outputs.squeeze(), age_targets)
            loss = gender_loss + age_loss
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            _, gender_preds = torch.max(gender_outputs, 1)
            train_corrects += torch.sum(gender_preds == gender_targets.data)

        model.eval()
        val_loss = 0.0
        val_corrects = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                gender_targets, age_targets = targets[:, 0], targets[:, 1].float()
                gender_outputs, age_outputs = model(inputs)
                gender_loss = criterion(gender_outputs, gender_targets)
                age_loss = mse_loss(age_outputs.squeeze(), age_targets)
                loss = gender_loss + age_loss
                val_loss += loss.item() * inputs.size(0)
                _, gender_preds = torch.max(gender_outputs, 1)
                val_corrects += torch.sum(gender_preds == gender_targets.data)

        train_loss = train_loss / len(train_loader.dataset)
        train_acc = train_corrects.double() / len(train_loader.dataset)
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_corrects.double() / len(val_loader.dataset)
        
        print(f'Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

# Treniranje modela
num_epochs = 10
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs)



Epoch 1/10: Train Loss: 417.6423, Train Acc: 0.5441, Val Loss: 347.8518, Val Acc: 0.6003
Epoch 2/10: Train Loss: 348.1706, Train Acc: 0.6281, Val Loss: 329.1642, Val Acc: 0.5595
Epoch 3/10: Train Loss: 334.9299, Train Acc: 0.6400, Val Loss: 318.4440, Val Acc: 0.6425
Epoch 4/10: Train Loss: 320.1390, Train Acc: 0.6405, Val Loss: 294.4964, Val Acc: 0.6463
Epoch 5/10: Train Loss: 308.0279, Train Acc: 0.6679, Val Loss: 282.8007, Val Acc: 0.6879
Epoch 6/10: Train Loss: 296.6810, Train Acc: 0.6794, Val Loss: 288.8911, Val Acc: 0.6472
Epoch 7/10: Train Loss: 283.5710, Train Acc: 0.6810, Val Loss: 261.5877, Val Acc: 0.6581
Epoch 8/10: Train Loss: 277.0854, Train Acc: 0.6786, Val Loss: 251.1639, Val Acc: 0.6966
Epoch 9/10: Train Loss: 265.1217, Train Acc: 0.6990, Val Loss: 240.0578, Val Acc: 0.7045
Epoch 10/10: Train Loss: 251.9742, Train Acc: 0.6984, Val Loss: 230.6586, Val Acc: 0.6979


In [20]:
def evaluate_model(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            gender_targets = targets[:, 0]
            age_targets = targets[:, 1].float()
            gender_outputs, age_outputs = model(inputs)
            gender_loss = criterion(gender_outputs, gender_targets)
            age_loss = mse_loss(age_outputs.squeeze(), age_targets)
            loss = gender_loss + age_loss
            val_loss += loss.item() * inputs.size(0)
            _, gender_preds = torch.max(gender_outputs, 1)
            val_corrects += torch.sum(gender_preds == gender_targets.data)
    
    val_loss = val_loss / len(val_loader.dataset)
    val_acc = val_corrects.double() / len(val_loader.dataset)
    
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

evaluate_model(model, val_loader, criterion)

Val Loss: 230.6586, Val Acc: 0.6979


In [21]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score

# Funkcija za racunanje metrika
def evaluate_model(model, val_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, targets in val_loader:
            gender_targets, _ = targets[:, 0], targets[:, 1].float()
            gender_outputs, _ = model(inputs)
            _, gender_preds = torch.max(gender_outputs, 1)

            y_true.extend(gender_targets.tolist())
            y_pred.extend(gender_preds.tolist())

    # Konfuzijska matrica
    cm = confusion_matrix(y_true, y_pred)
    print("Konfuzijska matrica:")
    print(cm)

    # Točnost
    acc = accuracy_score(y_true, y_pred)
    print("Točnost:", acc)

    # Preciznost
    precision = precision_score(y_true, y_pred)
    print("Preciznost:", precision)

    # Recall
    recall = recall_score(y_true, y_pred)
    print("Recall:", recall)

    # F1 Score
    f1 = f1_score(y_true, y_pred)
    print("F1 Score:", f1)


    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    print("AUC:", auc)

evaluate_model(model, val_loader)


Konfuzijska matrica:
[[2824  872]
 [1278 2143]]
Točnost: 0.6979064212449065
Preciznost: 0.7107794361525704
Recall: 0.6264250219234142
F1 Score: 0.665941578620261
AUC: 0.6952471429963392


In [22]:
torch.save(model.state_dict(), 'gender_age_model.pth')