In [2]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, color, segmentation
import cv2
from sklearn.cluster import KMeans
from skimage.segmentation import mark_boundaries
from sklearn.preprocessing import scale
import time

In [None]:
def get_feat_bin(img):
    height = img.shape[0]
    width = img.shape[1]
    n = 1024
    # hsv_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # lower_white = np.array([0, 0, 255 - 15])  # 假设threshold是一个你选择的值
    # upper_white = np.array([180, 255, 255])
    # mask = 255 - cv2.inRange(hsv_image, lower_white, upper_white)
    # kernel = np.ones((10, 10), np.uint8)
    # mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    # mask = cv2.erode(mask, kernel, iterations=2)
    # mask = mask / 255
    segments = segmentation.slic(img, n_segments=n, slic_zero=True, compactness=10, start_label=0,
                                 enforce_connectivity=True, convert2lab=True, sigma=0.7)
    # plt.imshow(mark_boundaries(img, segments))
    # plt.show()
    num_of_nodes = np.max(segments) + 1
    print(num_of_nodes)
    nodes = {node: {"rgb_list": [], "r": [], "g": [], "b": [], } for node in range(num_of_nodes)}
    # get rgb values and positions
    for y in range(height):
        for x in range(width):
            node = segments[y, x]
            if node < 0:
                continue
            rgb = img[y, x, :]
            nodes[node]["r"].append(rgb[2])
            nodes[node]["g"].append(rgb[1])
            nodes[node]["b"].append(rgb[0])
    for node in nodes:
        r_bin = np.bincount(nodes[node]["r"])
        r_bin = np.pad(r_bin, (0, 256 - len(r_bin)), 'constant', constant_values=(0, 0))
        g_bin = np.bincount(nodes[node]["g"])
        g_bin = np.pad(g_bin, (0, 256 - len(g_bin)), 'constant', constant_values=(0, 0))
        b_bin = np.bincount(nodes[node]["b"])
        b_bin = np.pad(b_bin, (0, 256 - len(b_bin)), 'constant', constant_values=(0, 0))
        nodes[node]["rgb_list"] = np.stack([r_bin, g_bin, b_bin]).ravel()
    segments_ids = np.unique(segments)
    pos = np.array([np.mean(np.nonzero(segments == i), axis=1) for i in segments_ids])
    pos = pos.astype(int)
    feat = [[] for node in range(num_of_nodes)]
    rgb = [nodes[node]['g'] + nodes[node]['r'] + nodes[node]['b'] for node in nodes]
    #pos[0]为height_y pos[1]为width_x
    print(time.perf_counter())
    for node in nodes:
        feature = nodes[node]['rgb_list']
        nodes[node]['feat'] = feature
        feat[node] = feature
        nodes[node]['slic_idx'] = node
    return feat, nodes, segments, pos, rgb

In [None]:
def get_plot(feat, img, pos):
    kmeans = KMeans(n_clusters=5, random_state=0).fit(feat)
    y, x = zip(*pos)
    labels = kmeans.labels_
    plt.subplot(1, 2, 1)
    plt.scatter(x, y, 20, c=labels, cmap='Set1', alpha=0.4)
    plt.imshow(img)
    plt.subplot(1, 2, 2)
    plt.imshow(img)
    plt.show()

In [None]:

import glob
import os
patch_root = os.listdir('/mnt/s3/lhm/hcc_patch/')
d = patch_root[2]

imgs = glob.glob(f'{patch_root}/{d}/*.png')
print(imgs)
img1 = cv2.imread(imgs[0])
img1 = img1[:,:,::-1]
feat1, nodes1, segments1, pos1, rgb1 = get_feat_bin(img1)

img2 = cv2.imread(imgs[1])
# 将图像转换为灰度图

# 裁剪图像
img2 = img2[:,:,::-1]
feat2, nodes2, segments2, pos2, rgb2 = get_feat_bin(img2)
# plt.axis('off')
plt.imshow(img1)
plt.show()
plt.axis('off')
plt.imshow(img2)
plt.show()
feat = feat1+feat2
print(len(feat))
norms = np.linalg.norm(feat, axis=1)
# 避免除以零，这里假设 norms 不会为零
normalized_feat = feat / norms[:, np.newaxis]
kmeans = KMeans(n_clusters=4, random_state=0)
kmeans.fit(normalized_feat)
predicted_labels = kmeans.predict(normalized_feat)  
feat = np.array(feat)
label1 = predicted_labels[:len(feat1)]
label2 = predicted_labels[len(feat1):]
print(len(label1),len(label2))
clustered_image = np.zeros_like(segments1)
    
for i in range(len(predicted_labels)):
    if predicted_labels[i] == 1:
        predicted_labels[i] =2 
    elif predicted_labels[i] == 2:
        predicted_labels[i] =1
    
for index,i in enumerate(label1):
    clustered_image[segments1 == index] = i 
plt.axis('off')
plt.imshow(clustered_image)
plt.colorbar()
plt.show()
clustered_image = np.zeros_like(segments2)
for index,i in enumerate(label2):
    clustered_image[segments2 == index] = i
plt.axis('off')
plt.imshow(clustered_image)
plt.colorbar()
plt.show()

In [None]:
import concurrent.futures
from tqdm import tqdm
from multiprocessing import Pool, Pipe, freeze_support

#=============================================================#
# 接口                                                        #
#-------------------------------------------------------------#
#   multi_process_exec 多进程执行                             #
#   multi_thread_exec  多线程执行                             #
#-------------------------------------------------------------#
# 参数：                                                      #
#   f         (function): 批量执行的函数                      #
#   args_mat  (list)    : 批量执行的参数                      #
#   pool_size (int)     : 进程/线程池的大小                   #
#   desc      (str)     : 进度条的描述文字                    #
#-------------------------------------------------------------#
# 例子：                                                      #
# >>> def Pow(a,n):        ← 定义一个函数（可以有多个参数）   #
# ...     return a**n                                         #
# >>>                                                         #
# >>> args_mat=[[2,1],     ← 批量计算 Pow(2,1)                #
# ...           [2,2],                Pow(2,2)                #
# ...           [2,3],                Pow(2,3)                #
# ...           [2,4],                Pow(2,4)                #
# ...           [2,5],                Pow(2,5)                #
# ...           [2,6]]                Pow(2,6)                #
# >>>                                                         #
# >>> results=multi_thread_exec(Pow,args_mat,desc='计算中')   #
# 计算中: 100%|█████████████| 6/6 [00:00<00:00, 20610.83it/s] #
# >>>                                                         #
# >>> print(results)                                          #
# [2, 4, 8, 16, 32, 64]                                       #
#-------------------------------------------------------------#

ToBatch = lambda arr,size:[arr[i*size:(i+1)*size] for i in range((size-1+len(arr))//size)]

def batch_exec(f,args_batch,w):
    results=[]
    for i,args in enumerate(args_batch):
        try:
            if isinstance(args, (list, tuple, dict)):
                ans = f(*args)
            else:
                ans = f(args)
            results.append(ans)
        except Exception as e:
            print(e)
            results.append(None)
        w.send(1)
    return results

def multi_process_exec(f,args_mat,pool_size=5,desc=None):
    if len(args_mat)==0:return []
    batch_size=max(1,int(len(args_mat)/4/pool_size))
    results=[]
    args_batches = ToBatch(args_mat,batch_size)
    with tqdm(total=len(args_mat), desc=desc) as pbar:
        with Pool(processes=pool_size) as pool:
            r,w=Pipe(duplex=False)
            pool_rets=[]
            for i,args_batch in enumerate(args_batches):
                pool_rets.append(pool.apply_async(batch_exec,(f,args_batch,w)))
            cnt=0
            while cnt<len(args_mat):
                try:
                    msg=r.recv()
                    pbar.update(1)
                    cnt+=1
                except EOFError:
                    print('EOFError')
                    break
            for ret in pool_rets:
                for r in ret.get():
                    results.append(r)
    return results

def multi_thread_exec(f,args_mat,pool_size=5,desc=None):
    if len(args_mat)==0:return []
    results=[None for _ in range(len(args_mat))]
    with tqdm(total=len(args_mat), desc=desc) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=pool_size) as executor:
            futures = {executor.submit(f,*args): i for i,args in enumerate(args_mat)}
            for future in concurrent.futures.as_completed(futures):
                i=futures[future]
                ret = future.result()
                results[i]=ret
                pbar.update(1)
    return results

In [None]:
img = cv2.imread('/Users/bytedance/PycharmProjects/kmeans_graph/224_224.png')
feat, nodes, segments, pos, rgb = get_feat_bin(img)
kmeans = KMeans(n_clusters=10, random_state=0).fit(feat)
y, x = zip(*pos)
labels = kmeans.labels_
plt.scatter(x, y, 20, c=labels, cmap='Set1', alpha=0.4)
plt.imshow(img)
plt.axis('off')

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import glob
import os
patch_root = '/mnt/s3/lhm/hcc_patch/cell'
slides = os.listdir(patch_root)
for d in slides:
    imgs = glob.glob(f'{patch_root}/{d}/*.png')
    if not os.path.exists(f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}'):
        os.mkdir(f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}')
    for i in imgs:
        name = i.split('/')[-1]
        img_o = cv2.imread(i)
        # img = cv2.resize(img_o,(400,400))
        # img = img[:,:,::-1]
        # print('save',f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}/{name}-ori.png')
        # cv2.imwrite(f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}/{name}-ori.png',img)
        # # # plt.show()
        # feat, nodes, segments, pos, rgb = get_feat_bin(img)
        # norms = np.linalg.norm(feat, axis=1)
        # # 避免除以零，这里假设 norms 不会为零
        # normalized_feat = feat / norms[:, np.newaxis]
        # kmeans = KMeans(n_clusters=4, random_state=0)
        # kmeans.fit(normalized_feat)
        # predicted_labels = kmeans.predict(normalized_feat)
        # feat = np.array(feat)
        # 
        # clustered_image = np.zeros_like(segments)
        # 
        # for index,i in enumerate(predicted_labels):
        #     clustered_image[segments == index] = i
        # plt.axis('off')
        # plt.imshow(clustered_image)
        # # plt.colorbar()
        # # plt.show()
        # plt.savefig(f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}/{name}.png', format='png')
        # # # 关闭图像
        plt.close()
        
        img = cv2.resize(img_o,(256,256))
        img = img[:,:,::-1]
        # plt.imshow(img)
        # plt.show()
        print('save',f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}/{name}-ori.png')
        cv2.imwrite(f'/mnt/s3/lhm/hcc_patch/cell_cluster/{d}/{name}-ori.png',img)
        

NameError: name 'cv2' is not defined

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import glob
import os
patch_root = '/mnt/s3/lhm/hcc_patch/tissue'
slides = os.listdir(patch_root)
for d in slides:
    imgs = glob.glob(f'{patch_root}/{d}/*.png')
    if not os.path.exists(f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}'):
        os.mkdir(f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}')
    for i in imgs:
        name = i.split('/')[-1]
        img_o = cv2.imread(i)
        img = cv2.resize(img_o,(400,400))
        img = img[:,:,::-1]
        print('save',f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}/{name}-ori.png')
        cv2.imwrite(f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}/{name}-ori.png',img)
        # # plt.show()
        feat, nodes, segments, pos, rgb = get_feat_bin(img)
        norms = np.linalg.norm(feat, axis=1)
        # 避免除以零，这里假设 norms 不会为零
        normalized_feat = feat / norms[:, np.newaxis]
        kmeans = KMeans(n_clusters=4, random_state=0)
        kmeans.fit(normalized_feat)
        predicted_labels = kmeans.predict(normalized_feat)
        feat = np.array(feat)

        clustered_image = np.zeros_like(segments)

        for index,i in enumerate(predicted_labels):
            clustered_image[segments == index] = i
        plt.axis('off')
        plt.imshow(clustered_image)
        # plt.colorbar()
        # plt.show()
        plt.savefig(f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}/{name}.png', format='png')
        # # 关闭图像
        plt.close()
        
        img = cv2.resize(img_o,(256,256))
        img = img[:,:,::-1]
        # plt.imshow(img)
        # plt.show()
        print('save',f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}/{name}-ori.png')
        cv2.imwrite(f'/mnt/s3/lhm/hcc_patch/tissue_cluster/{d}/{name}-ori.png',img)
        

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE, Isomap
from sklearn.decomposition import PCA

feat = np.array(feat)
# 应用PCA降维
tsne = TSNE(n_components=2, perplexity=30.0, n_iter=1000, random_state=42)
reduced_data  = tsne.fit_transform(data_scaled)

# umap_model = umap.UMAP(n_neighbors=15, n_components=2, min_dist=0.1, random_state=42)
# reduced_data = umap_model.fit_transform(data_scaled)
# isomap = Isomap(n_neighbors=10, n_components=2)
# reduced_data = isomap.fit_transform(data_scaled)

# lda = LinearDiscriminantAnalysis(n_components=2)
# reduced_data = lda.fit_transform(feat, predicted_labels)
# 绘制散点图
plt.figure(figsize=(6, 6))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
for i, color in zip(range(10), colors):
    plt.scatter(reduced_data[predicted_labels == i, 0],
                reduced_data[predicted_labels == i, 1],
                c=color,
                s=60,
                label=f'type {i+1}',alpha=0.5,edgecolor='none')
plt.axis('off')
# plt.legend()
plt.show()

In [None]:
import glob
import os
from scipy.spatial import KDTree

In [None]:
def get_group_feat(dir):
    imgs = glob.glob(dir + '/*.png')
    nodes_all = []
    feat_all = []
    segments_all = []
    for idx, i in enumerate(imgs):
        img = cv2.imread(i)[:, :, ::-1]
        feat, nodes, segments = get_feat_bin(img)
        for node in nodes:
            nodes[node]['img_id'] = idx
        nodes_all.extend(nodes.values())
        feat_all.extend(feat)
        segments_all.append(segments)
    return nodes_all, feat_all, segments_all

In [None]:
root = "/Users/bytedance/Pictures/medical/liverWSI/"
# img_folders = glob.glob(root + '*')
# for f in img_folders:
# node_all, feat_all, segments_all = get_group_feat(img_folders[0])
# img_folders
# kmeans = KMeans(n_clusters=6, random_state=0).fit(feat_all)
# labels = kmeans.labels_

In [None]:
def draw_attn(name):
    imgs = cv2.imread(root + f'base/{name}.png')
    feat, nodes, segments, pos, rgb = get_feat_bin(imgs)
    # 将图像转换为灰度图
    imgr = imgs[:, :, 2]

    # 将图像重塑为 (N, 1) 形状，其中 N 是像素的数量
    pixel_values = imgr.reshape((-1, 1))

    # 应用K-Means聚类
    kmeans = KMeans(n_clusters=10, random_state=0).fit(pixel_values)
    print(time.perf_counter())
    # 将聚类标签转换为图像尺寸的数组
    clustered = kmeans.labels_.reshape(imgr.shape)
    color_centers = kmeans.cluster_centers_
    print(color_centers.shape)
    sorted_indices = np.argsort(color_centers[:, 0])
    tree = KDTree(pos)
    points = read_points_from_xml(liver_name=f'{name}.xml', xml_path=root + 'xml/', scale=64, dataset='HCC')

    high_attn_idx = []
    high_attn_distance = []
    mask = np.zeros(imgs.shape[:2], dtype=np.int8)
    for p in points:
        _p = np.array(p, dtype=np.int32)
        cx = np.mean(_p[:, 0])  # 所有点的x坐标的平均值
        cy = np.mean(_p[:, 1])  # 所有点的y坐标的平均值
        center_point = (cx, cy)  #轮廓contour的质心的纵坐标
        distances, indices = tree.query(center_point, 15)
        high_attn_idx.extend(indices)
        high_attn_distance.extend(distances)
        print(mask.shape, _p.shape)
        # 使用 fillPoly() 填充多边形
        cv2.fillPoly(mask, [_p], 1)  # 掩码内部设置为1

    high_attn_distance = np.array(high_attn_distance)

    # 计算最小值和最大值
    X_min, X_max = high_attn_distance.min(), high_attn_distance.max()

    # 执行归一化
    high_attn_distance = (X_max - high_attn_distance) / (X_max - X_min)

    random_idx = []
    random_temp = []
    random_distance = []
    while len(random_temp) < np.random.randint(1, 4):
        r_idx = np.random.randint(len(pos))
        if (rgb[r_idx][255] > 220):
            continue
        center_point = pos[r_idx]
        random_temp.append(r_idx)
        distances, indices = tree.query(center_point, np.random.randint(10, 15))
        random_idx.extend(indices)
        random_distance.extend(distances)
    # distances, indices = tree.query((600,500), 35)
    # random_idx.extend(indices)
    # random_distance.extend(distances)
    random_distance = np.array(random_distance)

    # 计算最小值和最大值
    X_min, X_max = random_distance.min(), random_distance.max()

    # 执行归一化
    random_distance = (X_max - random_distance) / (X_max - X_min)

    alpha_init = [0.2] * len(pos)

    x, y = zip(*pos)
    for i in range(len(alpha_init)):
        alpha_init[i] = mask[pos[i][0], pos[i][1]] / 3 + sorted_indices[clustered[pos[i][0], pos[i][1]]] / 10

    for i, d in zip(high_attn_idx, high_attn_distance):
        alpha_init[i] = np.random.randint(15) / 100 + d
    for i, d in zip(random_idx, random_distance):
        alpha_init[i] = d
    plt.imshow(imgs)
    for p in points:
        _p = np.array(p)
        plt.plot(_p[:, 0], _p[:, 1], marker='*', color='b')
    plt.show()
    # plt.scatter(x, y, s=50, c='blue', marker='o', alpha=alpha_init, label='Data points')
    # plt.show()
    attn_img = np.zeros_like(segments)
    for i in range(len(alpha_init)):
        attn_img[segments == i] = alpha_init[i] * 100
    plt.imshow(attn_img, cmap='gray')
    plt.colorbar()
    plt.show()

In [None]:
def draw_attn(name):
    img = cv2.imread(root + f'group3/201122061_HE_1/{name}.png')
    labels, res = star_model.predict_instances(normalize(img))

    prob = np.array(res['prob'])
    mask = np.zeros_like(labels)
    for i in range(len(prob)):
        mask[labels == i] = prob[i]*100
    mask = 100 - mask
    plt.imshow(img)
    plt.show()
    plt.imshow(mask, cmap='gray')
    plt.colorbar()
    plt.show()
    feat, nodes, segments, pos, rgb = get_feat_bin(img)
    # 将图像转换为灰度图
    imgr = img[:, :, 2]

    # 将图像重塑为 (N, 1) 形状，其中 N 是像素的数量
    pixel_values = imgr.reshape((-1, 1))

    # 应用K-Means聚类
    kmeans = KMeans(n_clusters=10, random_state=0).fit(pixel_values)
    # 将聚类标签转换为图像尺寸的数组
    clustered = kmeans.labels_.reshape(imgr.shape)
    color_centers = kmeans.cluster_centers_
    print(color_centers.shape)
    sorted_indices = np.argsort(color_centers[:, 0])
    tree = KDTree(pos)

    random_idx = []
    random_temp = []
    random_distance = []
    while len(random_temp) < np.random.randint(1, 4):
        r_idx = np.random.randint(len(pos))
        if rgb[r_idx][255] > 220:
            continue
        center_point = pos[r_idx]
        random_temp.append(r_idx)
        distances, indices = tree.query(center_point, np.random.randint(10, 15))
        random_idx.extend(indices)
        random_distance.extend(distances)
    random_distance = np.array(random_distance)
    # 计算最小值和最大值
    X_min, X_max = random_distance.min(), random_distance.max()
    # 执行归一化
    random_distance = (X_max - random_distance) / (X_max - X_min)
    alpha_init = [20] * len(pos)

    x, y = zip(*pos)
    for i in range(len(alpha_init)):
        alpha_init[i] = sorted_indices[clustered[pos[i][0], pos[i][1]]] + mask[pos[i][0], pos[i][1]]
    for i, d in zip(random_idx, random_distance):
        alpha_init[i] = d
    alpha_init = np.array(alpha_init,dtype=np.int32)
    # plt.scatter(x, y, s=50, c='blue', marker='o', alpha=alpha_init, label='Data points')
    # plt.show()
    attn_img = np.zeros_like(segments)
    for i in range(len(alpha_init)):
        attn_img[segments == i] += alpha_init[i]
    plt.imshow(attn_img, cmap='gray')
    plt.colorbar()
    plt.show()

In [None]:

image_dirs = glob.glob(f'{root}/group3/201122061_HE_1/*.png')
for img in image_dirs:
    code = img.split('.')[-2].split('/')[-1]
    print(code)
    draw_attn(code)
    break

In [None]:
img_len = len(glob.glob(img_folders[0] + '/*.png'))
heat_maps = [[]] * img_len
for node in node_all:
    img_id = node["img_id"]
    k_id = node["k_id"]
    slic_id = node["slic_idx"]
    if len(heat_maps[img_id]) == 0:
        heat_maps[img_id] = segments_all[img_id].copy()
    heat_maps[img_id][heat_maps[img_id] == slic_id] = k_id

In [None]:
for idx, h in enumerate(heat_maps):
    plt.subplot(1, 2, 1)
    plt.imshow(h)
    plt.subplot(1, 2, 2)
    img = cv2.imread(glob.glob(img_folders[0] + '/*.png')[idx])[:, :, ::-1]
    plt.imshow(img)
    plt.show()

In [None]:
root = "/Users/bytedance/Pictures/medical/liverWSI/group3/"
img_folders = glob.glob(root + '*')
for f in img_folders:
    code = f.split('/')[-1].split('.')[0]
    print(code)
    if not os.path.exists(f'./imgs/{code}'):
        os.makedirs(f'./imgs/{code}')
    node_all, feat_all, segments_all = get_group_feat(f)
    # kmeans = KMeans(n_clusters=6, random_state=0).fit(feat_all)
    # labels = kmeans.labels_
    # for node, label in zip(node_all, labels):
    #     node['k_id'] = label
    # img_len = len(glob.glob(f + '/*.png'))
    # heat_maps = [[]] * img_len
    # for node in node_all:
    #     img_id = node["img_id"]
    #     k_id = node["k_id"]
    #     slic_id = node["slic_idx"]
    #     if len(heat_maps[img_id]) == 0:
    #         heat_maps[img_id] = segments_all[img_id].copy()
    #     heat_maps[img_id][heat_maps[img_id] == slic_id] = k_id
    # for idx, h in enumerate(heat_maps):
    #     try:
    #         plt.subplot(1, 2, 1)
    #         plt.imshow(h)
    #         plt.subplot(1, 2, 2)
    #         img = cv2.imread(glob.glob(f + '/*.png')[idx])[:, :, ::-1]
    #         plt.imshow(img)
    #         plt.savefig(f'./imgs/{code}/{idx}---level3--compa30.jpg')
    #     except Exception as e:
    #         print(e)
    #         continue

In [None]:
import numpy as np

# 假设 X 是一个 n*d 的 NumPy 数组，其中 n 是样本数量，d 是维度
X = np.array([[1, 2, 3], [4, 5, 6]])
scaler = StandardScaler
# 计算每个样本的 L2 范数
norms = np.linalg.norm(X, axis=1)

# 避免除以零，这里假设 norms 不会为零
normalized_X = X / norms[:, np.newaxis]

print(normalized_X)
print(data_scaled)
