In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
data = pd.read_csv('df.tsv', sep='\t')

data['DrugName_encoded'] = data['Drug Name'].astype('category').cat.codes

data['Targets_split'] = data['Targets'].fillna('').apply(lambda x: x.split(','))

all_targets = set([target for sublist in data['Targets_split'] for target in sublist])
target_to_id = {target: idx for idx, target in enumerate(all_targets)}

data['Targets_encoded'] = data['Targets_split'].apply(lambda x: [target_to_id[t] for t in x])

data.head()

Unnamed: 0,ID,Drug Name,Targets,Z Score,Count,DrugName_encoded,Targets_split,Targets_encoded
0,1821,765771,,-2.608364,279,13,[],[0]
1,1008,Methotrexate,Antimetabolite,-1.636781,279,159,[Antimetabolite],[23]
2,1168,Erlotinib,EGFR,-1.559059,279,94,[EGFR],[220]
3,1828,720427,,-1.529078,279,9,[],[0]
4,1915,AZD3759,EGFR,-1.384414,279,27,[EGFR],[220]


# Eğitim

In [None]:
num_drugs = len(data['DrugName_encoded'].unique())
num_targets = len(target_to_id)
embedding_dim = 16

drug_input = Input(shape=(1,), name='Drug_Input')
target_input = Input(shape=(None,), name='Target_Input')

drug_embedding = Embedding(input_dim=num_drugs, output_dim=embedding_dim, name='Drug_Embedding')(drug_input)
target_embedding = Embedding(input_dim=num_targets, output_dim=embedding_dim, name='Target_Embedding')(target_input)

target_vec = GlobalAveragePooling1D()(target_embedding)
drug_vec = Flatten()(drug_embedding)

combined = Concatenate()([drug_vec, target_vec])

dense1 = Dense(64, activation='relu')(combined)
dense2 = Dense(32, activation='relu')(dense1)
output = Dense(1, activation='linear', name='ZScore_Prediction')(dense2)

model = Model(inputs=[drug_input, target_input], outputs=output)

learning_rate = 0.01
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

In [None]:
X_drug = data['DrugName_encoded'].values
X_target = pad_sequences(data['Targets_encoded'], padding='post')  # Target ID'leri padding ile düzenle
y = data['Z Score']

X_drug_train, X_drug_test, X_target_train, X_target_test, y_train, y_test = train_test_split(
    X_drug, X_target, y, test_size=0.2, random_state=42
)

In [None]:
history = model.fit(
    x=[X_drug_train, X_target_train],
    y=y_train,
    batch_size=8,
    epochs=50,
    validation_data=([X_drug_test, X_target_test], y_test)
)

loss, mae = model.evaluate([X_drug_test, X_target_test], y_test)
print(f"Test Loss: {loss}")
print(f"Test MAE: {mae}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.3683033287525177
Test MAE: 0.505335807800293


In [None]:
model.save("451Lu.h5")

  saving_api.save_model(


In [None]:
loaded_model = load_model("22RV1.h5")

In [None]:
new_data = {
    'Drug Name': 'MK-8776',  # Örnek ilaç
    'Targets': ['CHEK1', 'CHEK2', 'CDK2']  # Örnek hedefler
}


new_drug_encoded = data['Drug Name'].astype('category').cat.categories.get_loc(new_data['Drug Name'])


new_targets_encoded = [target_to_id.get(target, 0) for target in new_data['Targets']]
new_targets_padded = pad_sequences([new_targets_encoded], padding='post', maxlen=X_target.shape[1])


prediction = model.predict([np.array([new_drug_encoded]), new_targets_padded])

print(f"Tahmin edilen Z Skor: {prediction[0][0]}")


Tahmin edilen Z Skor: 0.5538578629493713


In [None]:
import pandas as pd
import json
import os


dataset_dir = "datasets/"
drug_set = set()
target_set = set()

# Tüm veri setlerini işleme
for file in os.listdir(dataset_dir):
    if file.endswith(".tsv"):
        df = pd.read_csv(os.path.join(dataset_dir, file), sep='\t')

        drug_set.update(df['Drug Name'].unique())
        for targets in df['Targets']:

            if isinstance(targets, str) and targets:
                target_set.update(targets.split(','))ı

drug_to_id = {drug: idx for idx, drug in enumerate(sorted(drug_set))}
target_to_id = {target.strip(): idx for idx, target in enumerate(sorted(target_set))}

with open("cell_drug_encoder.json", "w") as f:
    json.dump(drug_to_id, f)

with open("cell_target_encoder.json", "w") as f:
    json.dump(target_to_id, f)

print("Encoder dosyaları oluşturuldu ve kaydedildi!")

Encoder dosyaları oluşturuldu ve kaydedildi!
