# Import libraries

In [3]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
from torchinfo import summary
import os

# Pretrain Fuzzy Clustering

In [4]:
%pwd

'/home/dungnd/Documents/AI/fake-faces-detection'

In [5]:
def load_data(train_dir: str, valid_dir: str, batch_size: int = 64):
    # Define transforms
    weights = torchvision.models.EfficientNet_V2_S_Weights.DEFAULT
    auto_transforms = weights.transforms()

    # Load data
    train_data = datasets.ImageFolder(train_dir, transform=auto_transforms, target_transform = None)
    valid_data = datasets.ImageFolder(valid_dir, transform=auto_transforms)

    # Create data loaders
    train_dataloader = DataLoader(train_data, batch_size=batch_size, num_workers=1, shuffle=True)
    valid_dataloader = DataLoader(valid_data, batch_size=batch_size, num_workers=1, shuffle=False)

    return train_dataloader, valid_dataloader

In [None]:
train_dir="archive/dataset/train"
valid_dir="archive/dataset/valid"
test_dir="archive/dataset/test"

train_loader, valid_loader = load_data(train_dir, valid_dir, batch_size=32)


In [18]:
for X, y in train_loader:
    print(X.shape)
    print(y.shape)
    break

torch.Size([32, 3, 384, 384])
torch.Size([32])


In [None]:
import torch
import numpy as np
from torchvision.models import resnet50
from torch.utils.data import DataLoader, TensorDataset

def fuzzy_c_means(train_loader, num_clusters, fuzziness=2, max_iter=100, tol=1e-4):
    """
    Thực hiện thuật toán Fuzzy C-Means.

    Args:
        data (torch.Tensor): Tensor dữ liệu đầu vào (mẫu x đặc trưng).
        num_clusters (int): Số lượng cluster.
        fuzziness (float): Tham số fuzziness (p trong bài báo).
        max_iter (int): Số lượng vòng lặp tối đa.
        tol (float): Ngưỡng dung sai để dừng thuật toán.

    Returns:
        torch.Tensor: Ma trận độ thuộc (mẫu x cluster).
        torch.Tensor: Ma trận centroids (cluster x đặc trưng).
    """
    num_samples, num_features = train_loader.shape

    # Khởi tạo centroids ngẫu nhiên
    centroids = torch.randn(num_clusters, num_features)
    for X, y in train_loader:
        for iteration in range(max_iter):
            old_centroids = centroids.clone()

            # Tính toán ma trận khoảng cách (Euclidean)
            distances = torch.cdist(data, centroids)

            # Tính toán ma trận độ thuộc (Equation 3 trong bài báo)
            membership = 1 / torch.pow(distances / (distances.sum(dim=1, keepdim=True) + 1e-9), 2 / (fuzziness - 1))
            membership = membership / (membership.sum(dim=1, keepdim=True) + 1e-9)

            # Cập nhật centroids (Equation 2 trong bài báo)
            numerator = torch.matmul(membership.pow(fuzziness).T, data)
            denominator = membership.pow(fuzziness).sum(dim=0, keepdim=True).T
            centroids = numerator / (denominator + 1e-9)

            # Kiểm tra điều kiện dừng
            if torch.all(torch.abs(centroids - old_centroids) < tol):
                break

    return membership, centroids

membership1_target, centroids1 = fuzzy_c_means(train_data, num_clusters = 2, fuzziness=2)


NameError: name 'train_data' is not defined

# Init Model

## model

In [4]:
import torch
import torch.nn as nn
from torchvision.models import resnet50

class FuzzyClusteringLayer(nn.Module):
    def __init__(self, input_dim, num_clusters):
        super(FuzzyClusteringLayer, self).__init__()
        self.num_clusters = num_clusters
        self.centroids = nn.Parameter(torch.randn(num_clusters, input_dim))

    def forward(self, x):
        # Tính khoảng cách Euclidean giữa các điểm dữ liệu và các centroid
        distances = torch.cdist(x, self.centroids)
        # Tính toán độ thuộc (membership degrees) bằng cách sử dụng hàm mũ hóa (có thể điều chỉnh tham số fuzziness)
        fuzziness = 2
        membership = torch.pow(1 / distances, 2 / (fuzziness - 1))
        # Chuẩn hóa độ thuộc để tổng của mỗi điểm dữ liệu bằng 1
        membership = membership / torch.sum(membership, dim=1, keepdim=True)
        return membership, distances

class Resnet50_FuzzyCluster(nn.Module):
    def __init__(self, num_clusters_fc1=2, num_clusters_fc2=5):
        super(Resnet50_FuzzyCluster, self).__init__()
        # Tải pre-trained ResNet50
        self.resnet = resnet50(pretrained=True)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        # Khối Sequential các lớp fully connected
        self.fc = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 2)
        )

        # Các lớp fuzzy clustering
        self.fuzzy_cluster1 = FuzzyClusteringLayer(1024, num_clusters_fc1)
        self.fuzzy_cluster2 = FuzzyClusteringLayer(512, num_clusters_fc2)

    def forward(self, x):
        # Forward pass qua ResNet50
        x = self.resnet(x)
        x = x.view(x.size(0), -1)

        # Forward pass qua lớp FC đầu tiên
        fc1_out = self.fc[0](x)
        fc1_relu_out = self.fc[1](fc1_out)

        # Fuzzy clustering sau lớp FC đầu tiên
        membership1, distances1 = self.fuzzy_cluster1(fc1_relu_out)

        # Forward pass qua lớp FC thứ hai
        fc2_out = self.fc[2](fc1_relu_out)
        fc2_relu_out = self.fc[3](fc2_out)

        # Fuzzy clustering sau lớp FC thứ hai
        membership2, distances2 = self.fuzzy_cluster2(fc2_relu_out)

        # Forward pass qua lớp FC cuối cùng
        output = self.fc[4](fc2_relu_out)

        return output, membership1, distances1, membership2, distances2

# Khởi tạo mô hình
model = Resnet50_FuzzyCluster(num_clusters_fc1=10, num_clusters_fc2=5)


# Ví dụ về forward pass với một batch dữ liệu ngẫu nhiên
# input_tensor 
# output, membership1, distances1, membership2, distances2 = model(input_tensor)

# print("Kích thước output:", output.shape)
# print("Kích thước membership lớp 1:", membership1.shape)
# print("Kích thước khoảng cá= torch.randn(64, 3, 224, 224)ch lớp 1:", distances1.shape)
# print("Kích thước membership lớp 2:", membership2.shape)
# print("Kích thước khoảng cách lớp 2:", distances2.shape)



In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
# model = init_model_ResNet50(trainable_extractor = False)
summary(model, (32, 3, 224, 224), device=device, col_names=["input_size", "output_size", "num_params", "kernel_size"], row_settings=["var_names"])

Layer (type (var_name))                            Input Shape               Output Shape              Param #                   Kernel Shape
Resnet50_FuzzyCluster (Resnet50_FuzzyCluster)      [32, 3, 224, 224]         [32, 2]                   --                        --
├─Sequential (resnet)                              [32, 3, 224, 224]         [32, 2048, 1, 1]          --                        --
│    └─Conv2d (0)                                  [32, 3, 224, 224]         [32, 64, 112, 112]        9,408                     [7, 7]
│    └─BatchNorm2d (1)                             [32, 64, 112, 112]        [32, 64, 112, 112]        128                       --
│    └─ReLU (2)                                    [32, 64, 112, 112]        [32, 64, 112, 112]        --                        --
│    └─MaxPool2d (3)                               [32, 64, 112, 112]        [32, 64, 56, 56]          --                        3
│    └─Sequential (4)                              [32, 64, 56,

## Loss

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomLoss(nn.Module):
    def __init__(self, classification_weight=0.9, clustering_weight=0.1, alpha1=0.5, alpha2=0.5):
        super(CustomLoss, self).__init__()
        self.classification_loss_fn = nn.CrossEntropyLoss()
        self.clustering_weight = clustering_weight
        self.classification_weight = classification_weight
        self.alpha1 = alpha1
        self.alpha2 = alpha2

    def forward(self, output, target, membership1, distances1, membership2, distances2, fuzzy_cluster_results1=None, fuzzy_cluster_results2=None):
        # Tính toán Classification Loss
        classification_loss = self.classification_loss_fn(output, target)

        # Tính toán Clustering Cost cho lớp thứ nhất
        clustering_loss1 = torch.tensor(0.0, requires_grad=True).to(output.device)
        if fuzzy_cluster_results1 is not None:
            # Tính toán MSE giữa membership và fuzzy_cluster_results1
            mse_loss1 = F.mse_loss(membership1, fuzzy_cluster_results1)
            # Tính toán Binary Cross Entropy Loss với zero vector
            batch_size = membership1.size(0)
            num_clusters1 = membership1.size(1)
            zero_target1 = torch.zeros(batch_size, num_clusters1).to(membership1.device)
            bce_loss1 = F.binary_cross_entropy(membership1, zero_target1)
            clustering_loss1 = mse_loss1 + bce_loss1

        # Tính toán Clustering Cost cho lớp thứ hai
        clustering_loss2 = torch.tensor(0.0, requires_grad=True).to(output.device)
        if fuzzy_cluster_results2 is not None:
            # Tính toán MSE giữa membership và fuzzy_cluster_results2
            mse_loss2 = F.mse_loss(membership2, fuzzy_cluster_results2)
            # Tính toán Binary Cross Entropy Loss với zero vector
            batch_size = membership2.size(0)
            num_clusters2 = membership2.size(1)
            zero_target2 = torch.zeros(batch_size, num_clusters2).to(membership2.device)
            bce_loss2 = F.binary_cross_entropy(membership2, zero_target2)
            clustering_loss2 = mse_loss2 + bce_loss2

        # Tổng hợp Clustering Cost
        total_clustering_loss = self.alpha1 * clustering_loss1 + self.alpha2 * clustering_loss2

        # Tổng hợp Total Cost
        total_loss = self.classification_weight * classification_loss + self.clustering_weight * total_clustering_loss

        return total_loss

In [20]:
CustomLoss = CustomLoss()
summary(CustomLoss)

TypeError: CustomLoss.forward() missing 6 required positional arguments: 'output', 'target', 'membership1', 'distances1', 'membership2', and 'distances2'