In [1]:
from neo4j import GraphDatabase

# Connect to Neo4j and fetch data
# Configure Neo4j connection
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "123456789"))

# Define a Cypher query to extract interactions
query = """MATCH (a:Drug)-[r:INTERACTS_WITH]->(b:Drug)
           RETURN a.name AS Drug_A, b.name AS Drug_B, r.severity AS Severity"""

# Fetch data from Neo4j
def fetch_interactions(driver):
    with driver.session() as session:
        result = session.run(query)
        return [(record['Drug_A'], record['Drug_B'], record['Severity']) for record in result]

# Execute query and close driver
interactions = fetch_interactions(driver)
driver.close()

In [2]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds

# Encoding severities
severity_mapping = {'Major': 3, 'Moderate': 2, 'Minor': 1, 'Unknown': 0}
encoded_interactions = [(d1, d2, severity_mapping.get(severity, 0)) for d1, d2, severity in interactions]

# Extracting unique drugs
drugs = list(set([d1 for d1, _, _ in encoded_interactions] + [d2 for _, d2, _ in encoded_interactions]))
drug_index = {drug: idx for idx, drug in enumerate(drugs)}

# Creating the interaction matrix
interaction_matrix = np.zeros((len(drugs), len(drugs)))
for d1, d2, severity in encoded_interactions:
    idx1, idx2 = drug_index[d1], drug_index[d2]
    interaction_matrix[idx1, idx2] = severity
    interaction_matrix[idx2, idx1] = severity  # assuming symmetry
interaction_df = pd.DataFrame(interaction_matrix, index=drugs, columns=drugs)

# Applying SVD
u, s, vt = svds(interaction_matrix, k=50)  # k is the number of latent factors
predicted_interactions = np.dot(np.dot(u, np.diag(s)), vt)
predicted_interactions_df = pd.DataFrame(predicted_interactions, index=drugs, columns=drugs)
predicted_interactions_df.head()

Unnamed: 0,Perampanel,Chloroprocaine,Fenoldopam,Prazosin,Phenylbutyric acid,Panobinostat,Satralizumab,Lansoprazole,Cefadroxil,Methsuximide,...,Phensuximide,Safinamide,Diazoxide,Cobimetinib,Deutetrabenazine,Human immunoglobulin G (intravenous),Nebivolol,Inulin,Flunisolide (nasal),Nilotinib
Perampanel,-0.103553,-0.00525,-0.08215,-0.075692,0.011889,-0.037945,-0.078716,-0.011338,-0.016774,-0.030441,...,0.030421,-0.02662,0.642953,0.042494,-0.073535,0.056491,0.005086,-0.001215,0.003656,0.309588
Chloroprocaine,-0.00525,0.032147,0.034213,0.014074,-0.004277,0.082432,0.039407,0.1807,0.03291,0.015296,...,0.038037,0.02846,0.016121,0.03088,-0.027763,0.003559,-0.005914,-0.001841,-0.026927,0.045887
Fenoldopam,-0.08215,0.034213,0.151003,0.066915,-0.019042,-0.145508,-0.001319,0.289223,0.01645,-0.048986,...,-0.018914,0.173547,0.399021,-0.014108,-0.010755,-0.033274,-0.011261,0.008262,-0.017797,0.016068
Prazosin,-0.075692,0.014074,0.066915,-0.034648,0.015606,-0.249129,-0.039055,0.037393,0.010884,-0.056981,...,-0.041344,0.125625,0.28286,0.042418,-0.014955,-0.081693,-0.00527,0.005316,-0.065978,-0.036221
Phenylbutyric acid,0.011889,-0.004277,-0.019042,0.015606,-0.017848,-0.027931,0.031556,0.072524,0.007993,0.033945,...,0.015615,-0.041516,-0.100297,-0.002355,0.086924,0.041026,0.009811,0.006018,0.04721,-0.14608


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Creating training data
train_data = []
for d1, d2, severity in encoded_interactions:
    idx1, idx2 = drug_index[d1], drug_index[d2]
    features = np.concatenate([u[idx1], vt.T[idx2]])
    train_data.append((features, severity))

X, y = zip(*train_data)
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluating the model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.94      0.95      5994
           1       0.98      0.72      0.83      1360
           2       0.94      0.98      0.96     19346
           3       0.97      0.88      0.92      5347

    accuracy                           0.95     32047
   macro avg       0.96      0.88      0.92     32047
weighted avg       0.95      0.95      0.95     32047



In [5]:
import joblib

# Save the trained RandomForestClassifier model
joblib.dump(clf, 'DDI_rf_model.pkl')

# Save necessary components for predictions
np.save('u_matrix.npy', u)
np.save('vt_matrix.npy', vt.T)
np.save('drug_index.npy', drug_index)


In [6]:
# prepare requirments file 

!pip freeze > requirements.txt