### Library import

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
import random
from captum.attr import IntegratedGradients
from captum.attr import IntegratedGradients, LayerConductance, NeuronConductance
import shap
import warnings
from sklearn.feature_selection import VarianceThreshold
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
warnings.filterwarnings("ignore")

### Data preprocessing

In [None]:
data = pd.read_excel(r"Endocrine_disrupting_toxicity.xlsx", index_col=0)
print(data.shape)
data.isnull().sum()
data.dropna(axis=1,inplace=True)
print(data.shape)
X = data.iloc[:, 1:]
Y = data.iloc[:, 0]
name = []
name = data.columns
name_X = []
name_X = X.columns
scaler = MinMaxScaler() 
scaler = scaler.fit(X)
X = scaler.transform(X)
X = pd.DataFrame(X)
X.columns = name_X

### Model training and evaluation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=2346)
Xtrain = torch.tensor(Xtrain.values, dtype=torch.float32, requires_grad=True)
Ytrain = torch.tensor(Ytrain.values, dtype=torch.float32).reshape(-1, 1)
Xtest = torch.tensor(Xtest.values, dtype=torch.float32, requires_grad=True)
Ytest = torch.tensor(Ytest.values, dtype=torch.float32).reshape(-1, 1)

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

setup_seed(11)

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_size = 198
hidden_size = 100
output_size = 1
model = Net(input_size, hidden_size, output_size)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.002)

num_epochs = 77
train_r2_list = []
test_r2_list = []
loss_list = []
for epoch in range(num_epochs):
    model.train()
    outputs = model(Xtrain)
    loss = criterion(outputs, Ytrain)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Calculate R2
    model.eval()
    train_r2 = r2_score(Ytrain.detach().numpy(), outputs.detach().numpy())
    test_outputs = model(Xtest).detach().numpy()
    test_r2 = r2_score(Ytest.detach().numpy(), test_outputs)
    
    train_r2_list.append(train_r2)
    test_r2_list.append(test_r2)
    loss_list.append(loss.item())

model.eval()
test_outputs = model(Xtest).detach().numpy()
test_r2 = r2_score(Ytest.detach().numpy(), test_outputs)
test_rmse = mean_squared_error(Ytest.detach().numpy(), test_outputs, squared=False)
test_mape = mean_absolute_percentage_error(Ytest.detach().numpy(), test_outputs)

print(f'Test R2: {test_r2:.4f}')
print(f'Test RMSE: {test_rmse:.4f}')
print(f'Test MAPE: {test_mape:.4f}')

### Related drawing codes

In [None]:
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X, Y, test_size=0.2, random_state=2346)
Xtrain = torch.tensor(Xtrain.values, dtype = torch.float32, requires_grad=True)
Ytrain = torch.tensor(Ytrain.values, dtype = torch.float32)
Ytrain = Ytrain.reshape(-1, 1)
Xtest = torch.tensor(Xtest.values, dtype = torch.float32, requires_grad=True)
Ytest = torch.tensor(Ytest.values, dtype = torch.float32)

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

setup_seed(11)


class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


input_size = 198
hidden_size = 100
output_size = 1
model = Net(input_size, hidden_size, output_size)


criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)


num_epochs = 93
train_r2_list = []
test_r2_list = []
loss_list = []
for epoch in range(num_epochs):
    
    outputs = model(Xtrain)
    loss = criterion(outputs, Ytrain)

    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    
    train_r2 = r2_score(Ytrain.detach().numpy(), outputs.detach().numpy())
    test_r2 = r2_score(Ytest.detach().numpy(), model(Xtest).detach().numpy())
    train_r2_list.append(train_r2)
    test_r2_list.append(test_r2)
    
    loss_list.append(loss.item())


plt.plot(test_r2_list, label='Test R²')
plt.legend()
plt.savefig('FigureA.jpeg', dpi=800)
plt.show()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=2346)
Xtrain = torch.tensor(Xtrain.values, dtype=torch.float32, requires_grad=True)
Ytrain = torch.tensor(Ytrain.values, dtype=torch.float32).reshape(-1, 1)
Xtest = torch.tensor(Xtest.values, dtype=torch.float32, requires_grad=True)
Ytest = torch.tensor(Ytest.values, dtype=torch.float32).reshape(-1, 1)

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

setup_seed(11)

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_size = 198
hidden_size = 100
output_size = 1
model = Net(input_size, hidden_size, output_size)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.002)

num_epochs = 77
train_r2_list = []
test_r2_list = []
loss_list = []
for epoch in range(num_epochs):
    model.train()
    outputs = model(Xtrain)
    loss = criterion(outputs, Ytrain)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Calculate R2
    model.eval()
    train_r2 = r2_score(Ytrain.detach().numpy(), outputs.detach().numpy())
    test_outputs = model(Xtest).detach().numpy()
    test_r2 = r2_score(Ytest.detach().numpy(), test_outputs)
    
    train_r2_list.append(train_r2)
    test_r2_list.append(test_r2)
    loss_list.append(loss.item())

model.eval()
test_outputs = model(Xtest).detach().numpy()
test_r2 = r2_score(Ytest.detach().numpy(), test_outputs)
test_rmse = mean_squared_error(Ytest.detach().numpy(), test_outputs, squared=False)
test_mape = mean_absolute_percentage_error(Ytest.detach().numpy(), test_outputs)

print(f'Test R2: {test_r2:.4f}')
print(f'Test RMSE: {test_rmse:.4f}')
print(f'Test MAPE: {test_mape:.4f}')

In [None]:
from captum.attr import IntegratedGradients
import matplotlib.pyplot as plt
import numpy as np

ig = IntegratedGradients(model)

attr = ig.attribute(Xtest)
Z = attr.mean(dim=0).detach().numpy()
result = {name_X[i]: Z[i] for i in range(len(name_X))}
print(result)

data = {
    'fr_bicyclic': 1.83082213668952,
    'Chi1v': -1.91441309632332,
    'EState_VSA1': -1.921388603,
    'NumValenceElectrons': -1.92536121069362,
    'MinPartialCharge': 1.97291203921059,
    'SMR_VSA5': -1.99721359083136,
    'Chi1': -2.00671905050771,
    'PEOE_VSA6': 2.01025381017043,
    'HeavyAtomMolWt': -2.08742382099308,
    'MolLogP': -2.1021457536726,
    'MinEStateIndex': 2.17448003369858,
    'HeavyAtomCount': -2.18370850653338,
    'FpDensityMorgan3': 2.2064949600378,
    'MolMR': -2.22788723122922,
    'MolWt': -2.24982406444705,
    'ExactMolWt': -2.28740919297808,
    'LabuteASA': -2.31398776457505,
    'Chi0v': -2.47414596668115,
    'EState_VSA2': 3.39630736862912,
    'qed': -4.300790938,       
}

values = np.array(list(data.values()))
keys = list(data.keys())

cmap = plt.get_cmap('gist_rainbow')

plt.figure(figsize=(10, 5))

plt.barh(keys, values, color=cmap((values - min(values)) / (max(values) - min(values))))

plt.ylabel('')

plt.box(False)

sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=-5, vmax=4.1))
sm._A = []
cbar = plt.colorbar(sm, fraction=0.05, pad=0.05)

cbar.set_ticks(np.arange(-5, 4.1, 1))

plt.xticks([])
plt.savefig('FigureB.jpeg', dpi=800, bbox_inches='tight')
plt.show()

In [None]:
from captum.attr import LayerConductance
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch

lc = LayerConductance(model, model.fc1)

attr = lc.attribute(Xtest)
neuron_importance = attr.mean(dim=0).detach().numpy()

sorted_neurons = np.argsort(neuron_importance)[::-1]

for i, neuron_idx in enumerate(sorted_neurons):
    print(f"Neuron {neuron_idx+1}：{neuron_importance[neuron_idx]}")

data = {
    'Neuron 1': -1.0436538457870483,
    'Neuron 2': -0.9643536806106567,
    'Neuron 3': -0.8725631833076477,
    'Neuron 4': -0.8477023243904114,
    'Neuron 5': -0.8362962007522583,
    'Neuron 6': 0,
    'Neuron 7': -0.8157883286476135,
    'Neuron 8': -0.8106172680854797,
    'Neuron 9': -0.7873838543891907,
    'Neuron 10': -0.7696040272712708,
    'Neuron 11': -0.7640208005905151,
    #'Neuron 11':2.4897756576538086,
    #'Neuron 12':1.9344542026519775,
    #'Neuron 13':1.3854787349700928,
    #'Neuron 14':1.3820840120315552,
    #'Neuron 15':1.1767284870147705,
    #'Neuron 16':1.1616559028625488,
    #'Neuron 17':1.1439871788024902,
    #'Neuron 18':0.7889372706413269,
    #'Neuron 19':0.7652159333229065,
    #'Neuron 20':0.6409977674484253,
}


'''
'Neuron 6': 6.597263813018799,
'Neuron 97':6.399002552032471,
'Neuron 18':5.87737512588501,
'Neuron 22':5.4173688888549805,
'Neuron 17':4.501695156097412,
'Neuron 87':4.278412818908691,
'Neuron 88':4.137702465057373,
'Neuron 65':3.1936779022216797,
'Neuron 29':3.1493241786956787,
'Neuron 14':2.881157875061035,
'Neuron 49':2.4897756576538086,
'Neuron 64':1.9344542026519775,
'Neuron 56':1.3854787349700928,
'Neuron 21':1.3820840120315552,
'Neuron 52':1.1767284870147705,
'Neuron 31':1.1616559028625488,
'Neuron 79':1.1439871788024902,
'Neuron 26':0.7889372706413269,
'Neuron 80':0.7652159333229065,
'Neuron 98':0.6409977674484253,
'''



features = [
    'qed',
    'EState_VSA2',
    'Chi0v',
    'LabuteASA',
    'ExactMolWt',
    'MolWt',
    'MolMR',
    'FpDensityMorgan3',
    'HeavyAtomCount',
    'MinEStateIndex',
    '   ',
    'MolLogP',
    'HeavyAtomMolWt',
    'PEOE_VSA6',
    'Chi1',
    'SMR_VSA5',
    'MinPartialCharge',
    'NumValenceElectrons',
    'EState_VSA1',
    'Chi1v',
    'fr_bicyclic'
]

fig, ax = plt.subplots()

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 8

for i, feature in enumerate(features):
    y = (len(features) - i - 1) * 30
    ax.add_patch(plt.Circle((0, y), radius=15, fill=True, color='#A5BBF1'))
    ax.text(-0.5, y-4, feature, ha='center')

ax.text(0, len(features[:2]) * 30 / 2 + 279, '.\n.\n.', fontsize=15, ha='center', va='center', linespacing=0.1)
for i in range(0, 11, 1):
    y = (10 - i) * 60
    value = data[f'Neuron {i+1}']
    if i == 5:  # 对应'Neuron 6'
        color = 'white'
    else:
        color = plt.cm.cool((value))
    ax.add_patch(plt.Circle((150, y), radius=15, fill=True, color=color))
    
    for j in range(len(features)):
        y2 = (len(features) - j - 1) * 30
        arrow = FancyArrowPatch((15, y2), (135, y), arrowstyle='-', color='#7F7F7F')
        ax.add_patch(arrow)

ax.add_patch(plt.Circle((300, 300), radius=15, fill=True, color='#A5BBF1'))
ax.text(300, 297.5, 'Prediction', ha='center')

for i in range(11):
    y = (10 - i) * 60
    arrow = FancyArrowPatch((165, y), (285, 300), arrowstyle='-', color='#7F7F7F')
    ax.add_patch(arrow)
    
ax.text(150, len(features[:2]) * 30 / 2 + 279, '.\n.\n.', fontsize=15, ha='center', va='center', linespacing=0.1)

sm = plt.cm.ScalarMappable(cmap='cool', norm=plt.Normalize(-2, 1))
sm.set_array([])
cbar = fig.colorbar(sm)
cbar.set_label('Value')

ax.set_xlim(-30, 330)
ax.set_ylim(-30, len(features) * 30 + 30)
ax.axis('off')

plt.savefig('FigureC.jpeg', dpi=800, bbox_inches='tight')
plt.show()

In [None]:
from captum.attr import NeuronConductance
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

nc = NeuronConductance(model, model.fc1)

result = {}

for i in [8, 11, 20, 26, 32, 43, 59, 84, 85, 86]:
    attr = nc.attribute(Xtest, neuron_selector=i)
    attr = attr.mean(dim=0).detach().numpy()
    
    for j in range(len(name_X)):
        if name_X[j] not in result:
            result[name_X[j]] = []
        result[name_X[j]].append(attr[j])

print(result)

data = np.array([
    [-0.047974702,0.006826546,-0.007966277,-0.035245348,-0.036223903,-0.008588311,-0.03242728,-0.0074772676,-0.017466202,-0.016794669,-0.02449837,-0.03025021,0.01873159,-0.031281937,-0.008324048,0.025918337,-0.010695268,-0.007341668,-0.016904086,-0.005131461,],
    [-0.017090455,0.0018274228,-0.040876497,-0.035364438,-0.03798308,-0.022837691,-0.037333094,-0.002479387,-0.0071010627,-0.021786207,-0.045515228,-0.02740875,0.0045686048,-0.0015920455,-0.008896397,0.0042075925,-0.006449085,-0.0113487765,-0.019213723,0.010742647,],
    [-0.03894115,0.008579423,-0.043827284,-0.03605061,-0.023285896,-0.04052573,-0.033702243,-0.025568198,-0.022079349,-0.025808912,-0.05145197,-0.038948048,0.02171494,-0.036778845,-0.024327772,0.025389278,-0.032182615,-0.008180781,-0.014665323,-0.0043586856,],
    [-0.033392757,0.027300164,-0.019350832,-0.0104254745,-0.032506093,-0.031038338,-0.030210137,0.01974341,-0.028029228,0.0072249565,-0.041281205,-0.034244742,0.0019742383,-0.034447726,-0.008393174,0.00024330204,-0.02274837,-0.015031001,-0.011362917,0.002926899,],
    [-0.005780885,0.040365014,-0.030166255,-0.014865114,-0.05288321,-0.016610507,-0.058399677,-0.0073889606,-0.038320147,-0.008843667,-0.04129985,-0.03857405,0.029585522,-0.012552547,-0.027832013,0.0459433,-0.021054404,-0.024722766,-0.05830594,0.01780828,],
    [-0.048681203,0.021364301,-0.008319061,-0.011838579,-0.014050511,-0.030098416,-0.044913877,0.018817758,-0.02245745,-0.034885872,-0.018554704,-0.029428909,-0.0022542237,-0.004514317,-0.025198149,-0.0034349074,-0.03399838,-0.01135846,-0.038726833,0.02030137,],
    [-0.053464964,0.019829666,-0.030076608,-0.050837044,-0.034657437,-0.021011518,-0.031936795,0.010174816,-0.031004604,-0.032281023,-0.04076117,-0.01649162,0.035794728,-0.032747578,-0.011429698,0.026779652,-0.025786564,-0.011535103,-0.019424804,0.017478708,],
    [-0.01836985,0.021870457,-0.051652025,-0.013818014,-0.042710774,-0.03116479,-0.04989288,0.017709976,-0.033566803,0.081505515,-0.006492082,-0.016777322,0.04011368,-0.03918339,-0.010787043,0.023099953,-0.02788983,-0.013396405,-0.04255111,0.013075263,],
    [-0.03132871,-0.0021005175,-0.025682887,-0.042944506,-0.03024733,-0.027673414,-0.03012,-0.0054182434,-0.0089554945,-0.031060183,-0.042326745,-0.009168986,0.0011400715,-0.020685332,-0.011493203,0.023276541,-0.018050954,-0.0060635754,-0.021904059,0.0045651915,],
    [-0.024517532,0.010059569,-0.012097158,-0.012454095,-0.011494362,-0.015160727,-0.010017421,0.031475626,-0.019072639,-0.06658334,-0.018129272,-0.026794529,0.013095123,-0.005228624,-0.012937534,0.0048879227,-0.028997729,-0.0051419414,-0.008143133,-0.0019467298] 
    
])

x_labels = [ 'qed','EState_VSA2','Chi0v','LabuteASA','ExactMolWt','MolWt','MolMR','FpDensityMorgan3','HeavyAtomCount','MinEStateIndex','MolLogP','HeavyAtomMolWt','PEOE_VSA6','Chi1','SMR_VSA5','MinPartialCharge','NumValenceElectrons','EState_VSA1','Chi1v','fr_bicyclic']
y_labels = ['Neuron 9','Neuron 12','Neuron 21','Neuron 27','Neuron 33','Neuron 44','Neuron 60','Neuron 85','Neuron 86','Neuron 87']

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 9

ax = sns.heatmap(data, cmap='RdBu_r', xticklabels=x_labels, yticklabels=y_labels, linewidths=0.5)

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.savefig('FigureD.jpeg', dpi=800, bbox_inches='tight')
plt.show()