In [27]:
import numpy as np
from sklearn.manifold import TSNE

def soft_max(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0, keepdims=True)

def threshold_below(arr, threshold):
    return np.where(arr <= threshold, 0, arr)

class TSNE_attention():
    def __init__(self, array: np.array, n_components: int, dim_info: int):
        self.array = array
        self.n_components = n_components
        self.dim_info = dim_info
        self.n_col = len(array[0])
        
        # t-SNE를 사용해서 데이터의 임베딩을 얻음
        self.embeddings = self.apply_tsne()
        
        # 얻어진 임베딩을 Query와 Key로 사용
        self.attention_query = self.embeddings[:dim_info]
        self.attention_key = self.embeddings[dim_info:2*dim_info]
        
    def apply_tsne(self):
        if self.dim_info <= 4:
            tsne = TSNE(n_components=self.dim_info)
        else:
            tsne = TSNE(n_components=self.dim_info, method='exact')
        return tsne.fit_transform(self.array)


    def attention(self, threshold: bool = False, threshold_value: float = 0.05):
        query_matrix = self.embeddings[:self.dim_info]
        key_matrix = self.embeddings[self.dim_info:2*self.dim_info]
        
        attention_score = query_matrix.dot(key_matrix.T)
        attention_score = np.apply_along_axis(soft_max, axis=1, arr=attention_score)
        
        if threshold:
            return threshold_below(attention_score, threshold_value)
        else:
            return attention_score




In [None]:
#  def attention(self, threshold: bool = False, threshold_value: float = 0.1):
#         query_matrix = self.attention_query
#         key_matrix = self.attention_key
#         attention_score = np.apply_along_axis(soft_max, axis=1, arr=(query_matrix.dot(key_matrix.T)))
        
#         if threshold:
#             return threshold_below(attention_score, threshold_value)
#         else:
#             return attention_score  


def attention(self, threshold: bool = False, threshold_value: float = 0.1):
        query_matrix = self.embeddings[:self.dim_info]
        key_matrix = self.embeddings[self.dim_info:2*self.dim_info]
        
        attention_score = query_matrix.dot(key_matrix.T)
        attention_score = np.apply_along_axis(soft_max, axis=1, arr=attention_score)
        
        if threshold:
            return threshold_below(attention_score, threshold_value)
        else:
            return attention_score

In [9]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris

# NaN 값 처리: NaN은 0으로 변환
def handle_nan_values(array):
    return np.nan_to_num(array)

# 데이터 로드 (예제 데이터로 아이리스 데이터 사용)
data = load_iris()
X = handle_nan_values(data.data)

# t-SNE 객체 생성
tsne = TSNE(n_components=3)
tsne_result = tsne.fit_transform(X)

print("t-SNE Result:\n", tsne_result)

# House Prices 데이터 로드 및 전처리
df = pd.read_csv('./Data/house-prices-advanced-regression-techniques/train.csv')
numeric_df = df.select_dtypes(include=[np.number])
array_df = handle_nan_values(np.array(numeric_df.iloc[:, :36]))


# t-SNE 객체 생성
tsne = TSNE(n_components=4, method='exact')
tsne_result_4 = tsne.fit_transform(array_df)

print("t-SNE Result (4 components):\n", tsne_result_4[1])




t-SNE Result:
 [[ -9.350981     4.518563     0.87246007]
 [ -9.642999     3.4841146    2.7946773 ]
 [-10.252443     3.1136777    2.069078  ]
 [ -9.774651     2.3975914    2.3486736 ]
 [ -9.440605     4.1338167    0.4413081 ]
 [ -8.062143     5.155597    -0.5055862 ]
 [-10.117939     2.7224593    1.4000645 ]
 [ -9.131928     3.878818     1.2626442 ]
 [-10.3558445    2.198988     2.8334596 ]
 [ -9.272717     3.3894575    2.489817  ]
 [ -8.577934     5.3946557    0.16126953]
 [ -9.148348     3.020916     1.3003525 ]
 [ -9.815205     3.1410995    2.8210135 ]
 [-10.9906645    2.4234931    2.5991864 ]
 [ -8.693689     6.2415657   -0.5896976 ]
 [ -8.399846     5.9496574   -1.1169597 ]
 [ -8.982227     5.6069508   -0.42459607]
 [ -9.125455     4.397391     0.8165352 ]
 [ -7.983601     5.7974877   -0.23800497]
 [ -8.890529     4.501139    -0.2528285 ]
 [ -8.073598     5.186797     1.0251501 ]
 [ -8.603398     4.292098     0.08324241]
 [-10.838895     3.1880758    1.0097841 ]
 [ -7.9691305    3.

ValueError: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.


ValueError: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.  
t-sne를 사용하였을떄 barnes_hut 알고리즘으로 컴포넌트가 4보다 클때 발생하는 오류임    
메소드를 exact 로하면 더큰 차원에서도 할수 있는것임 

In [10]:
import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x))  # 지수 함수 계산 및 최대값 보정
    return exp_x / exp_x.sum(axis=0)

# 입력 벡터
input_vector = np.array([2.0, 1.0, -2.1])

# 소프트맥스 함수 적용
output_probs = softmax(input_vector)

print("Input Vector:", input_vector)

print("Output Probabilities:", output_probs)
print("Sum of Probabilities:", np.sum(output_probs))

Input Vector: [ 2.   1.  -2.1]
Output Probabilities: [0.72230739 0.26572204 0.01197057]
Sum of Probabilities: 0.9999999999999999


In [12]:
a = TSNE_attention(array_df, n_components=30, dim_info=10).attention(threshold=True)[0]
a

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.28829557, 0.        , 0.        , 0.        , 0.71170413],
      dtype=float32)

In [20]:
n_components = 30
dim_info = 10
attention_query = np.random.rand(n_components, dim_info)
attention_key = np.random.rand(n_components, dim_info)
attention_value = np.random.rand(n_components, dim_info)

In [21]:
attention_query.shape[0]

30

In [28]:
ta = TSNE_attention(array_df, n_components, dim_info)


attention_result = ta.attention(threshold=True)
print(attention_result[0])

[0.        0.        0.        0.        0.        0.1895373 0.
 0.        0.        0.8104624]


In [29]:
print(attention_result[1])

[0.19999926 0.         0.         0.         0.26954177 0.
 0.15057392 0.         0.         0.26704443]


t-sne에서는 결과인 인베딩 값을 사용하여 어텐션을 계산함 

In [1]:
def quick_sort(arr):
    def sort(low, high):
        if high <= low:
            return

        mid = partition(low, high)
        sort(low, mid - 1)
        sort(mid, high)

    def partition(low, high):
        pivot = arr[(low + high) // 2]
        while low <= high:
            while arr[low] < pivot:
                low += 1
            while arr[high] > pivot:
                high -= 1
            if low <= high:
                arr[low], arr[high] = arr[high], arr[low]
                low, high = low + 1, high - 1
        return low

    sort(0, len(arr) - 1)

def filter_sublists(input_list):
    # 결과 리스트 초기화
    result = []
    
    for sublist in input_list:
        # 현재 서브리스트 복사 후 정렬
        sorted_sublist = sublist.copy()
        quick_sort(sorted_sublist)
        
        # 현재 정렬된 서브리스트가 다른 서브리스트에 완전히 포함되는지 확인
        is_subset = any(all(item in other for item in sorted_sublist) for other in input_list if other != sublist)
        
        # 현재 서브리스트가 다른 서브리스트에 포함되지 않는 경우 결과 리스트에 추가
        if not is_subset:
            result.append(sorted_sublist)
    
    return result

# 주어진 입력에 대해 함수 실행
input_data = [[0], [1, 2, 12], [1, 2, 12, 23], 
              [3, 5, 6, 7, 11, 12, 15, 18, 22, 23, 24, 25, 26, 27, 28], 
              [4], [3, 5, 6, 7, 18, 24, 25], [3, 5, 6, 18, 24, 25]]
output_data = filter_sublists(input_data)
output_data


[[0],
 [1, 2, 12, 23],
 [3, 5, 6, 7, 11, 12, 15, 18, 22, 23, 24, 25, 26, 27, 28],
 [4]]