In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

from kaggle.api.kaggle_api_extended import KaggleApi
import kagglehub

from sklearn.preprocessing import KBinsDiscretizer,  LabelEncoder
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from hopfieldnetwork import HopfieldNetwork 

In [2]:
api = KaggleApi()
api.authenticate()

In [3]:
!chcp 65001
path = %pwd

Active code page: 65001


In [4]:
api.dataset_download_files("uciml/iris", path = path, unzip = True)

Dataset URL: https://www.kaggle.com/datasets/uciml/iris


In [5]:
os.listdir()

['.ipynb_checkpoints', '3 лаба.ipynb', 'database.sqlite', 'Iris.csv']

In [6]:
df_train = pd.read_csv('iris.csv')
df_train

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [7]:
df_train.drop(columns=["Id"], inplace=True)

In [8]:
num_features = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]
n_bins = 4  
discretizer = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="uniform")
X_discrete = discretizer.fit_transform(df_train[num_features])

In [9]:
binary_features = [f"{col}_bin{i}" for col in num_features for i in range(n_bins)]
df_binary = pd.DataFrame(X_discrete, columns=binary_features)
df_binary["Species"] = df_train["Species"]

In [10]:
label_encoder = LabelEncoder()
y_true = label_encoder.fit_transform(df_binary['Species'])

In [11]:
len(df_binary['Species'].unique())

3

In [12]:
class HopfieldNetwork:
    def __init__(self, n_neurons):
        self.n_neurons = n_neurons
        self.weights = np.zeros((n_neurons, n_neurons))
    
    def train(self, patterns):
        for p in patterns:
            p = p * 2 - 1  # Преобразование в {-1, 1}
            self.weights += np.outer(p, p)
        np.fill_diagonal(self.weights, 0)
        self.weights /= len(patterns)
    
    def recall(self, patterns, steps=5):
        results = []
        for p in patterns:
            p = p * 2 - 1  # Преобразование в {-1, 1}
            for _ in range(steps):
                p = np.sign(self.weights @ p)
            results.append((p + 1) // 2)  # Обратное преобразование в {0, 1}
        return np.array(results)

In [13]:
hopfield_net = HopfieldNetwork(n_neurons=X_discrete.shape[1])
hopfield_net.train(X_discrete)
y_hopfield_full = hopfield_net.recall(X_discrete)

In [14]:
kmeans_hopfield = KMeans(n_clusters=len(df_binary['Species'].unique()), random_state=42)
y_hopfield = kmeans_hopfield.fit_predict(y_hopfield_full)

[WinError 2] Не удается найти указанный файл
  File "D:\anaconda\envs\torch\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "D:\anaconda\envs\torch\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\anaconda\envs\torch\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "D:\anaconda\envs\torch\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  return fit_method(estimator, *args, **kwargs)


In [15]:
kmeans = KMeans(n_clusters=len(df_binary['Species'].unique()), random_state=42)
kmeans_labels = kmeans.fit_predict(X_discrete)



In [16]:
silhouette_hopfield = silhouette_score(X_discrete, y_hopfield)
silhouette_kmeans = silhouette_score(X_discrete, kmeans_labels)

print(f"Silhouette Score - Hopfield: {silhouette_hopfield:.4f}")
print(f"Silhouette Score - K-Means: {silhouette_kmeans:.4f}")

Silhouette Score - Hopfield: 0.3445
Silhouette Score - K-Means: 0.3592
