In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import AllChem
from concurrent.futures import ThreadPoolExecutor

In [4]:
df = pd.read_csv(r"C:\Users\ARAVINTH1\Downloads\drugs_side_effects_drugs_com.csv")

In [5]:
# Function to fetch SMILES from PubChem
def get_smiles(drug_name):
    try:
        compound = pcp.get_compounds(drug_name, 'name')
        return compound[0].isomeric_smiles if compound else "Unknown"
    except:
        return "Unknown"

# Parallelize API calls for SMILES conversion
with ThreadPoolExecutor(max_workers=10) as executor:
    df["smiles"] = list(executor.map(get_smiles, df["drug_name"]))

In [6]:
def mol_to_fingerprint(smiles):
    if not smiles or smiles == "Unknown":
        return np.zeros(1024)
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
    return np.zeros(1024)

df['fingerprint'] = df['smiles'].apply(mol_to_fingerprint)
X_fp = np.vstack(df['fingerprint'].values)



In [7]:
X_text = np.random.rand(len(df), 768)

In [17]:
i = 0  # Choose the appropriate row index
X = np.hstack([X_fp[i], X_text[i]]).reshape(1, 1792)


In [18]:
df['side_effects'] = df['side_effects'].astype(str).str.replace(r'[^\w\s,]', '')  # Remove special characters
y_labels = df['side_effects'].apply(lambda x: set(x.split(',')) if isinstance(x, str) else set())

mlb = MultiLabelBinarizer()
y = mlb.fit_transform(y_labels)

  df['side_effects'] = df['side_effects'].astype(str).str.replace(r'[^\w\s,]', '')  # Remove special characters


In [19]:
label_counts = np.sum(y, axis=0)
common_labels = np.where(label_counts > 20)[0]
y = y[:, common_labels]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Reduce Training Data Size (Use 80% for Speed)
X_train_small = X_train[:int(len(X_train) * 0.8)]
y_train_small = y_train[:int(len(y_train) * 0.8)]

In [56]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam

model = Sequential([
    Dense(1024, input_shape=(X_train.shape[1],)),
    BatchNormalization(),  # 🔥 Improves stability
    tf.keras.layers.LeakyReLU(alpha=0.1),  # 🔥 Better than ReLU
    Dropout(0.3),
    
    Dense(512),
    BatchNormalization(),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(256),
    BatchNormalization(),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(y_train.shape[1], activation='sigmoid')  # Multi-label classification
])

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0003), metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [57]:
model.fit(X_train_small, y_train_small, batch_size=128, epochs=50, validation_data=(X_test, y_test))

Epoch 1/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 83ms/step - accuracy: 0.0012 - loss: 0.7336 - val_accuracy: 0.0051 - val_loss: 0.6611
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step - accuracy: 0.0091 - loss: 0.5866 - val_accuracy: 0.0085 - val_loss: 0.6441
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step - accuracy: 0.0192 - loss: 0.4702 - val_accuracy: 0.0136 - val_loss: 0.6170
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 0.0341 - loss: 0.3832 - val_accuracy: 0.0221 - val_loss: 0.5697
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 63ms/step - accuracy: 0.0473 - loss: 0.3199 - val_accuracy: 0.0341 - val_loss: 0.5193
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 63ms/step - accuracy: 0.0405 - loss: 0.2730 - val_accuracy: 0.0460 - val_loss: 0.4725
Epoch 7/50
[1m13/13[0m [32m━━━━

<keras.src.callbacks.history.History at 0x16cd661f490>

In [58]:
y_pred = (model.predict(X_test) > 0.5).astype(int)
print(" F1 Score:", f1_score(y_test, y_pred, average='macro'))

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
 F1 Score: 0.09092112015605024


In [59]:
import pickle
from tensorflow.keras.models import load_model

# Save the trained model
model.save("drug_interaction_model.h5")

# Save the MultiLabelBinarizer (for decoding side effects)
with open("mlb.pkl", "wb") as f:
    pickle.dump(mlb, f)




In [71]:
import pickle
import numpy as np

with open("mlb.pkl", "rb") as f:
    mlb_full = pickle.load(f)  # Load original mlb (14130 labels)

# Select the top 337 classes (you might need to customize this)
selected_classes = mlb_full.classes_[:337]  
print(len(selected_classes))  # Should be 337

# Create a new MultiLabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer
mlb_337 = MultiLabelBinarizer(classes=selected_classes)
mlb_337.fit([])  # Fit with empty data just to register classes

# Save the new mlb
with open("mlb_337.pkl", "wb") as f:
    pickle.dump(mlb_337, f)


337


In [72]:
# Load the correct mlb
with open("mlb_337.pkl", "rb") as f:
    mlb = pickle.load(f)

# Ensure it has 337 labels
print(len(mlb.classes_))  # Should print 337

# Convert model output to binary labels
predicted_labels = (prediction > 0.5).astype(int)  

# Get actual interactions
predicted_interactions = mlb.inverse_transform(predicted_labels)
print(predicted_interactions)


337
[(' Enbrel ', ' Inc One Amgen Center Drive', ' Tempra Quicklets may cause a severe skin reaction that can be fatal', ' acetaminophen may cause a severe skin reaction that can be fatal This could occur even if you have taken acetaminophen in the past and had no reaction Stop taking Excedrin and call your doctor right away if you have skin redness or a rash that spreads and causes blistering and peeling If you have this type of reaction', ' acetaminophen may cause a severe skin reaction that can be fatal This could occur even if you have taken acetaminophen in the past and had no reaction Stop taking Theraflu Sinus  Cold and call your doctor right away if you have skin redness or a rash that spreads and causes blistering and peeling If you have this type of reaction', ' and a skin rash on your cheeks or arms that worsens in sunlight Taking Prevacid OTC longterm may cause you to develop stomach growths called fundic gland polyps Talk with your doctor about this risk If you use Prevaci

In [80]:
import numpy as np
import pickle
from tensorflow.keras.models import load_model

# Load trained model
model = load_model("drug_interaction_model.h5")

# Load MultiLabelBinarizer (337 labels)
with open("mlb_337.pkl", "rb") as f:
    mlb = pickle.load(f)

# Function to preprocess input and predict interactions
def predict_interaction(drug1, drug2):
    """
    Predicts the interaction risk level and possible side effects of two drugs.
    """
    # Convert input text into numerical embeddings (Replace with actual embeddings)
    X_fp = np.random.rand(1024)  # Dummy fingerprint (replace with real drug fingerprint)
    X_text = np.random.rand(768)  # Dummy text embedding (replace with real text embedding)

    # Stack features to match (1, 1792) shape
    X_input = np.hstack([X_fp, X_text]).reshape(1, 1792)

    # Make a prediction
    prediction = model.predict(X_input)

    # Convert probabilities to binary labels
    predicted_labels = (prediction > 0.5).astype(int)

    # Get the predicted interactions
    predicted_interactions = mlb.inverse_transform(predicted_labels)

    # Determine risk level (Example Logic: You can modify this)
    risk_score = prediction.sum() / prediction.shape[1]  # Average probability
    if risk_score > 0.7:
        risk_level = "High"
    elif risk_score > 0.4:
        risk_level = "Moderate"
    else:
        risk_level = "Low"

    return risk_level, predicted_interactions

# Get user input for two drugs
drug1 = input("Enter the first drug name: ")
drug2 = input("Enter the second drug name: ")

# Predict interactions
risk, side_effects = predict_interaction(drug1, drug2)

# Display results
print(f"\n🚨 Interaction Risk Level: {risk}")
print(f"⚠️ Possible Side Effects: {side_effects}")








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step

🚨 Interaction Risk Level: Low
⚠️ Possible Side Effects: [(' Enbrel ', ' HP can affect growth in children Tell your doctor if your child is not growing at a normal rate while using Acthar Gel', ' Inflectra ', ' Ltd', ' a need to urinate often', ' acetaminophen may cause a severe skin reaction that can be fatal', ' acetaminophen may cause a severe skin reaction that can be fatal This could occur even if you have taken acetaminophen in the past and had no reaction Stop taking Dristan Cold Multi Symptom Formula and call your doctor right away if you have skin redness or a rash that spreads and causes blistering and peeling If you have this type of reaction', ' acetaminophen may cause a severe skin reaction that can be fatal This could occur even if you have taken acetaminophen in the past and had no reaction Stop taking Trezix and call your doctor right away if you have skin redness or a rash that spreads and causes 

In [None]:
def get_feature_vector(med1, med2):
    fp1 = get_medicine_embedding(med1)  # (1024,)
    text1 = get_text_embedding(med1)  # (768,)
    x
    fp2 = get_medicine_embedding(med2)  # (1024,)
    text2 = get_text_embedding(med2)  # (768,)

    print(f"fp1 shape: {fp1.shape}, text1 shape: {text1.shape}")
    print(f"fp2 shape: {fp2.shape}, text2 shape: {text2.shape}")

    # Ensure correct input shape (1, 1792)
    X_input = np.hstack([
        fp1[:512], text1[:384],  # Use half of each
        fp2[:512], text2[:384]
    ]).reshape(1, 1792)

    print(f"X_input shape: {X_input.shape}")
    return X_input


In [82]:
med1 = input("Enter first medicine name: ")
med2 = input("Enter second medicine name: ")

X_input = get_feature_vector(med1, med2)

# Predict
prediction = model.predict(X_input)

print(f"Raw model output: {prediction}")


fp1 shape: (1024,), text1 shape: (768,)
fp2 shape: (1024,), text2 shape: (768,)
X_input shape: (1, 1792)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Raw model output: [[0.00700769 0.01191466 0.03106603 0.02757229 0.0169377  0.03070173
  0.01165006 0.01017273 0.01384451 0.01514732 0.00714969 0.10386109
  0.02489238 0.04211647 0.01405441 0.02546832 0.0212915  0.02946555
  0.01281874 0.01219374 0.00993949 0.00227648 0.02181018 0.02868828
  0.02175592 0.02253743 0.0206405  0.06569012 0.01607708 0.04414706
  0.02593749 0.1094082  0.0681665  0.03013899 0.02743702 0.0208501
  0.00450242 0.09866636 0.01645547 0.39083937 0.03663253 0.33143595
  0.01399723 0.0173055  0.15274818 0.00811472 0.3527145  0.0187206
  0.24261212 0.4644611  0.0631545  0.01162957 0.01862145 0.01774411
  0.01239115 0.01517615 0.542359   0.02839506 0.04999423 0.01290029
  0.10577232 0.47278726 0.01753118 0.02418245 0.01633345 0.02152435
  0.0524065  0.12336385 0.00549845 0.0043747  0.02421829 0.

In [83]:
# Convert probabilities to binary labels
predicted_labels = (prediction > 0.5).astype(int)

print(f"Predicted labels shape: {predicted_labels.shape}")

try:
    predicted_interactions = mlb.inverse_transform(predicted_labels)
    print(f"Predicted interactions: {predicted_interactions}")
except ValueError as e:
    print(f"Error in inverse_transform: {e}")


Predicted labels shape: (1, 337)
Predicted interactions: [(' Inc One Amgen Center Drive', ' acetaminophen may cause a severe skin reaction that can be fatal', ' and blood pressure stiff muscles sweating Problems with your heartbeat These heart problems can cause death Call your healthcare provider right away if you have any of these symptoms passing out or feeling like you will pass out dizziness feeling as if your heart is pounding or missing beats Uncontrolled body movements tardive dyskinesia Invega Hafyera may cause movements that you cannot control in your face', ' and confusion may be more likely in older adults Common side effects of Dicel may include dizziness ', ' and confusion may be more likely in older adults Common side effects of Robitussin Nighttime Nasal Relief may include dizziness ')]


In [84]:
# Extract and clean the predicted interactions
cleaned_interactions = []
for interaction in predicted_interactions[0]:  # It's a tuple, so take index 0
    if len(interaction.split()) < 20:  # Keep only short, meaningful interactions
        cleaned_interactions.append(interaction)

# Determine risk level based on presence of severe terms
high_risk_keywords = ["fatal", "death", "severe", "dangerous"]
risk_level = "Low"
for interaction in cleaned_interactions:
    if any(word in interaction.lower() for word in high_risk_keywords):
        risk_level = "High"
        break

# Print refined results
print("\n🚨 **Interaction Risk Level:**", risk_level)
print("⚠️ **Possible Side Effects:**", ", ".join(cleaned_interactions) if cleaned_interactions else "No significant effects found.")



🚨 **Interaction Risk Level:** High
⚠️ **Possible Side Effects:**  Inc One Amgen Center Drive,  acetaminophen may cause a severe skin reaction that can be fatal,  and confusion may be more likely in older adults Common side effects of Dicel may include dizziness 


In [2]:
import numpy as np

def get_feature_vector(med1, med2):
    # Get embeddings
    fp1 = get_medicine_embedding(med1)  # (1024,)
    text1 = get_text_embedding(med1)  # (768,)
    fp2 = get_medicine_embedding(med2)  # (1024,)
    text2 = get_text_embedding(med2)  # (768,)

    # Ensure correct input shape (1, 1792) by reducing feature dimensions
    X_input = np.hstack([
        fp1[:512], text1[:384],  # Use half of each
        fp2[:512], text2[:384]
    ]).reshape(1, 1792)
    
    return X_input

# User input
med1 = input("Enter first medicine name: ").strip()
med2 = input("Enter second medicine name: ").strip()

if not med1 or not med2:
    print("Error: Both medicine names are required.")
else:
    # Convert input to vector
    X_input = get_feature_vector(med1, med2)

    # Predict
    prediction = model.predict(X_input)
    predicted_labels = (prediction > 0.5).astype(int)
    
    try:
        # Get the actual interaction labels
        predicted_interactions = mlb.inverse_transform(predicted_labels)

        # Extract and clean predicted interactions
        cleaned_interactions = [
            interaction for interaction in predicted_interactions[0]
            if len(interaction.split()) < 20  # Keep only short, meaningful interactions
        ]

        # Determine risk level based on severe terms
        high_risk_keywords = ["fatal", "death", "severe", "dangerous"]
        risk_level = "Low"
        for interaction in cleaned_interactions:
            if any(word in interaction.lower() for word in high_risk_keywords):
                risk_level = "High"
                break

        # Display refined results
        print("\n🚨 **Interaction Risk Level:**", risk_level)
        print("⚠️ **Possible Side Effects:**", ", ".join(cleaned_interactions) if cleaned_interactions else "No significant effects found.")
    
    except ValueError as e:
        print(f"Error in inverse_transform: {e}")


NameError: name 'get_medicine_embedding' is not defined

In [None]:
Acetaminophen (Tylenol)
Ibuprofen (Advil)
Aspirin
Naproxen