In [100]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import jaccard_score
from tensorflow.keras.datasets import cifar10

def som_clustering(data, som_dim, num_iterations, learning_rate, sigma=None, learning_rate_decay=False, decay_start_iteration=0):
    input_dim = data.shape[1]
    output_dim = (som_dim, som_dim)
    
    weights = np.random.random((output_dim[0], output_dim[1], input_dim))
    
    for iteration in range(num_iterations):
        # Select a random input vector
        input_vector = data[np.random.choice(len(data))]
        
        # Find the best matching unit (BMU)
        distances = np.sum((input_vector - weights) ** 2, axis=(1, 2))
        bmu_index = np.unravel_index(np.argmin(distances), output_dim)
        
        # Update the weights of the BMU and its neighbors
        for i in range(output_dim[0]):
            for j in range(output_dim[1]):
                distance_to_bmu = np.sqrt((i - bmu_index[0]) ** 2 + (j - bmu_index[1]) ** 2)
                if sigma is None or distance_to_bmu <= sigma:
                    if learning_rate_decay and iteration >= decay_start_iteration:
                        current_learning_rate = learning_rate / (iteration - decay_start_iteration + 1)
                    else:
                        current_learning_rate = learning_rate
                    weights[i, j] += current_learning_rate * (input_vector - weights[i, j])
    
    # Assign each data point to its closest cluster
    cluster_labels = np.zeros(len(data))
    for i, input_vector in enumerate(data):
        distances = np.sum((input_vector - weights) ** 2, axis=(1, 2))
        bmu_index = np.unravel_index(np.argmin(distances), output_dim)
        cluster_labels[i] = bmu_index[0] * som_dim + bmu_index[1]
    
    return cluster_labels

# بارگیری داده‌های CIFAR-10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# تصاویر مربوط به خوشه‌های airplane، cat و bird
selected_classes = [0, 3, 2]
selected_train_idx = np.isin(y_train, selected_classes).flatten()
selected_test_idx = np.isin(y_test, selected_classes).flatten()

x_train_selected = x_train[selected_train_idx]
y_train_selected = y_train[selected_train_idx]

x_test_selected = x_test[selected_test_idx]
y_test_selected = y_test[selected_test_idx]

# تبدیل تصاویر به بردارهای یک بعدی
x_train_selected = x_train_selected.reshape((len(x_train_selected), -1))
x_test_selected = x_test_selected.reshape((len(x_test_selected), -1))

# مقیاس‌بندی داده‌ها
scaler = MinMaxScaler()
x_train_selected = scaler.fit_transform(x_train_selected)
x_test_selected = scaler.transform(x_test_selected)

# پارامترهای آزمایش
som_dim = 10
num_iterations = 10000

# ---------------
# آزمایش 1: بدون تعریف همسایگی برای نورون‌های لایه خروجی SOM
learning_rate_1 = 0.5

train_clusters_1 = som_clustering(x_train_selected, som_dim, num_iterations, learning_rate_1)
test_clusters_1 = som_clustering(x_test_selected, som_dim, num_iterations, learning_rate_1)

jaccard_train_1 = jaccard_score(y_train_selected, train_clusters_1, average='micro')
jaccard_test_1 = jaccard_score(y_test_selected, test_clusters_1, average='micro')

print("آزمایش 1")
print("امتیاز Jaccard بر روی داده‌های آموزش:", jaccard_train_1)
print("امتیاز Jaccard بر روی داده‌های تست:", jaccard_test_1)
print()

# ---------------
# آزمایش 2: بدون تعریف همسایگی برای نورون‌های لایه خروجی SOM با کاهش نرخ یادگیری بعد از تعدادی تکرار SOM
learning_rate_2 = 0.5
learning_rate_decay_2 = True
decay_start_iteration_2 = 5000

train_clusters_2 = som_clustering(x_train_selected, som_dim, num_iterations, learning_rate_2,
                                  learning_rate_decay=learning_rate_decay_2, decay_start_iteration=decay_start_iteration_2)
test_clusters_2 = som_clustering(x_test_selected, som_dim, num_iterations, learning_rate_2,
                                 learning_rate_decay=learning_rate_decay_2, decay_start_iteration=decay_start_iteration_2)

jaccard_train_2 = jaccard_score(y_train_selected, train_clusters_2, average='micro')
jaccard_test_2 = jaccard_score(y_test_selected, test_clusters_2, average='micro')

print("آزمایش 2")
print("امتیاز Jaccard بر روی داده‌های آموزش:", jaccard_train_2)
print("امتیاز Jaccard بر روی داده‌های تست:", jaccard_test_2)
print()

# ---------------
# آزمایش 3: با تعریف همسایگی دلخواه برای نورون‌های لایه خروجی SOM با کاهش نرخ یادگیری بعد از تعدادی تکرار SOM
learning_rate_3 = 0.5
learning_rate_decay_3 = True
decay_start_iteration_3 = 5000
sigma_3 = 2

train_clusters_3 = som_clustering(x_train_selected, som_dim, num_iterations, learning_rate_3,
                                  sigma=sigma_3, learning_rate_decay=learning_rate_decay_3, 
                                  decay_start_iteration=decay_start_iteration_3)
test_clusters_3 = som_clustering(x_test_selected, som_dim, num_iterations, learning_rate_3,
                                 sigma=sigma_3, learning_rate_decay=learning_rate_decay_3, 
                                 decay_start_iteration=decay_start_iteration_3)

jaccard_train_3 = jaccard_score(y_train_selected, train_clusters_3, average='micro')
jaccard_test_3 = jaccard_score(y_test_selected, test_clusters_3, average='micro')

print("آزمایش 3")
print("امتیاز Jaccard بر روی داده‌های آموزش:", jaccard_train_3)
print("امتیاز Jaccard بر روی داده‌های تست:", jaccard_test_3)
print()

آزمایش 1
امتیاز Jaccard بر روی داده‌های آموزش: 0.2
امتیاز Jaccard بر روی داده‌های تست: 0.2

آزمایش 2
امتیاز Jaccard بر روی داده‌های آموزش: 0.2
امتیاز Jaccard بر روی داده‌های تست: 0.2

آزمایش 3
امتیاز Jaccard بر روی داده‌های آموزش: 0.16836078981189392
امتیاز Jaccard بر روی داده‌های تست: 0.2



In [102]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import jaccard_score
from tensorflow.keras.datasets import cifar10

def som_clustering(data, som_dim, num_iterations, learning_rate, sigma=None, learning_rate_decay=False, decay_start_iteration=0, use_labels=False, attraction_factor=0.5):
    input_dim = data.shape[1]
    output_dim = (som_dim, som_dim)
    
    weights = np.random.random((output_dim[0], output_dim[1], input_dim))
    
    for iteration in range(num_iterations):
        
        input_vector = data[np.random.choice(len(data))]
        
        
        distances = np.sum((input_vector - weights) ** 2, axis=(1, 2))
        bmu_index = np.unravel_index(np.argmin(distances), output_dim)
        
        
        for i in range(output_dim[0]):
            for j in range(output_dim[1]):
                distance_to_bmu = np.sqrt((i - bmu_index[0]) ** 2 + (j - bmu_index[1]) ** 2)
                if sigma is None or distance_to_bmu <= sigma:
                    if learning_rate_decay and iteration >= decay_start_iteration:
                        current_learning_rate = learning_rate / (iteration - decay_start_iteration + 1)
                    else:
                        current_learning_rate = learning_rate
                    if use_labels:
                        label_distance = np.abs(input_vector - weights[i, j])
                        weight_update = current_learning_rate * (input_vector - weights[i, j])
                        weights[i, j] += weight_update * (1 - attraction_factor * np.mean(label_distance))
                    else:
                        weights[i, j] += current_learning_rate * (input_vector - weights[i, j])
    
    
    cluster_labels = np.zeros(len(data))
    for i, input_vector in enumerate(data):
        distances = np.sum((input_vector - weights) ** 2, axis=(1, 2))
        bmu_index = np.unravel_index(np.argmin(distances), output_dim)
        cluster_labels[i] = bmu_index[0] * som_dim + bmu_index[1]
    
    return cluster_labels


(x_train, y_train), (x_test, y_test) = cifar10.load_data()


selected_classes = [0, 3, 2]
selected_train_idx = np.isin(y_train, selected_classes).flatten()
selected_test_idx = np.isin(y_test, selected_classes).flatten()

x_train_selected = x_train[selected_train_idx]
y_train_selected = y_train[selected_train_idx]

x_test_selected = x_test[selected_test_idx]
y_test_selected = y_test[selected_test_idx]


x_train_selected = x_train_selected.reshape((len(x_train_selected), -1))
x_test_selected = x_test_selected.reshape((len(x_test_selected), -1))


scaler = MinMaxScaler()
x_train_selected = scaler.fit_transform(x_train_selected)
x_test_selected = scaler.transform(x_test_selected)

# پارامترهای آزمایش
som_dim = 10
num_iterations = 1000


learning_rate_4 = 0.3
learning_rate_decay_4 = True
decay_start_iteration_4 = 5000
sigma_4 = 2
use_labels_4 = True
attraction_factor_4 = 0.5

train_clusters_4 = som_clustering(x_train_selected, som_dim, num_iterations, learning_rate_4,
                                  sigma=sigma_4, learning_rate_decay=learning_rate_decay_4, 
                                  decay_start_iteration=decay_start_iteration_4, use_labels=use_labels_4,
                                  attraction_factor=attraction_factor_4)
test_clusters_4 = som_clustering(x_test_selected, som_dim, num_iterations, learning_rate_4,
                                 sigma=sigma_4, learning_rate_decay=learning_rate_decay_4, 
                                 decay_start_iteration=decay_start_iteration_4, use_labels=use_labels_4,
                                 attraction_factor=attraction_factor_4)

jaccard_train_4 = jaccard_score(y_train_selected, train_clusters_4, average='micro')
jaccard_test_4 = jaccard_score(y_test_selected, test_clusters_4, average='micro')

print("آزمایش 4")
print("امتیاز Jaccard بر روی داده‌های آموزش:", jaccard_train_4)
print("امتیاز Jaccard بر روی داده‌های تست:", jaccard_test_4)
print()

آزمایش 4
امتیاز Jaccard بر روی داده‌های آموزش: 0.2
امتیاز Jaccard بر روی داده‌های تست: 0.21040952188823886

