In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import MACCSkeys
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np


In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

import matplotlib.pyplot as plt
plt.rc('font',family='Times New Roman')


In [None]:
# 读取数据
test_1 = pd.read_csv("/home/ubuntu/FEAOF/data/processed/Train_Val.csv")

test_1 = test_1[test_1["split"] == "test"]
# 计算 MACCS 指纹
def calculate_maccs(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        return MACCSkeys.GenMACCSKeys(mol)
    else:
        return None

test_1['MACCS'] = test_1['smiles'].apply(calculate_maccs)

# 将指纹转换为 numpy array
fingerprints = [fp.ToBitString() for fp in test_1['MACCS'] if fp is not None]
fingerprints_array = np.array([[int(bit) for bit in fp] for fp in fingerprints])

# 同时过滤掉无效的标签
valid_indices = [i for i, fp in enumerate(test_1['MACCS']) if fp is not None]
labels = test_1['y'].iloc[valid_indices].values

# t-SNE降维
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
tsne_result = tsne.fit_transform(fingerprints_array)

# 绘图
plt.figure(figsize=(12, 10), dpi=600)
colors = ['lightcoral', 'cornflowerblue']
label_names = ['Negative', 'Positive']

for i, label in enumerate([0, 1]):
    idx = labels == label
    plt.scatter(tsne_result[idx, 0], tsne_result[idx, 1],
                c=colors[i], label=label_names[i], s=15, alpha=0.8)
    

# 修改刻度字体大小
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("t-SNE 1", fontsize=20)
plt.ylabel("t-SNE 2", fontsize=20)
plt.title("t-SNE of MACCS Keys for Validation Set", fontsize=20)
plt.legend(fontsize=20)
plt.tight_layout()
# 保存为矢量图（PDF格式）
plt.savefig("/home/ubuntu/FEAOF/figs/fig3_A.pdf", format='pdf', bbox_inches='tight')
plt.show()


In [None]:
# 读取数据
test_1 = pd.read_csv("/home/ubuntu/FEAOF/data/processed/Test_1.csv")

# 计算 MACCS 指纹
def calculate_maccs(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        return MACCSkeys.GenMACCSKeys(mol)
    else:
        return None

test_1['MACCS'] = test_1['smiles'].apply(calculate_maccs)

# 将指纹转换为 numpy array
fingerprints = [fp.ToBitString() for fp in test_1['MACCS'] if fp is not None]
fingerprints_array = np.array([[int(bit) for bit in fp] for fp in fingerprints])

# 同时过滤掉无效的标签
valid_indices = [i for i, fp in enumerate(test_1['MACCS']) if fp is not None]
labels = test_1['y'].iloc[valid_indices].values

# t-SNE降维
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
tsne_result = tsne.fit_transform(fingerprints_array)

# 绘图
plt.figure(figsize=(12, 10), dpi=600)
colors = ['lightcoral', 'cornflowerblue']
label_names = ['Negative', 'Positive']

for i, label in enumerate([0, 1]):
    idx = labels == label
    plt.scatter(tsne_result[idx, 0], tsne_result[idx, 1],
                c=colors[i], label=label_names[i], s=15, alpha=0.8)
    

# 修改刻度字体大小
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("t-SNE 1", fontsize=20)
plt.ylabel("t-SNE 2", fontsize=20)
plt.title("t-SNE of MACCS Keys for Test 1 Set", fontsize=20)
plt.legend(fontsize=20)
plt.tight_layout()
# 保存为矢量图（PDF格式）
plt.savefig("/home/ubuntu/FEAOF/figs/fig3_B.pdf", format='pdf', bbox_inches='tight')
plt.show()


In [None]:
# 读取数据
test_1 = pd.read_csv("/home/ubuntu/FEAOF/data/processed/Test_2.csv")

# 计算 MACCS 指纹
def calculate_maccs(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        return MACCSkeys.GenMACCSKeys(mol)
    else:
        return None

test_1['MACCS'] = test_1['smiles'].apply(calculate_maccs)

# 将指纹转换为 numpy array
fingerprints = [fp.ToBitString() for fp in test_1['MACCS'] if fp is not None]
fingerprints_array = np.array([[int(bit) for bit in fp] for fp in fingerprints])

# 同时过滤掉无效的标签
valid_indices = [i for i, fp in enumerate(test_1['MACCS']) if fp is not None]
labels = test_1['y'].iloc[valid_indices].values

# t-SNE降维
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
tsne_result = tsne.fit_transform(fingerprints_array)

# 绘图
plt.figure(figsize=(12, 10), dpi=600)
colors = ['lightcoral', 'cornflowerblue']
label_names = ['Negative', 'Positive']

for i, label in enumerate([0, 1]):
    idx = labels == label
    plt.scatter(tsne_result[idx, 0], tsne_result[idx, 1],
                c=colors[i], label=label_names[i], s=15, alpha=0.8)
    

# 修改刻度字体大小
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("t-SNE 1", fontsize=20)
plt.ylabel("t-SNE 2", fontsize=20)
plt.title("t-SNE of MACCS Keys for Test 2 Set", fontsize=20)
plt.legend(fontsize=20)
plt.tight_layout()
# 保存为矢量图（PDF格式）
plt.savefig("/home/ubuntu/FEAOF/figs/fig3_C.pdf", format='pdf', bbox_inches='tight')
plt.show()
