### Data Preparation

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Load the dataset
data = pd.read_csv('../data/patient_processed.csv')

# Extract features and labels
features = data[['tanimoto', 'feature_jsim', 'feature_dsim', 'feature_osim']].values
labels = data['DDI'].values

# Encode the labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(labels)
one_hot_encoded = to_categorical(integer_encoded)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, one_hot_encoded, test_size=0.2, random_state=42)

### Model Building

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD

# Build the model
model = Sequential()
model.add(Dense(300, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))  # Output layer for 4 classes

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=SGD(learning_rate=0.01, momentum=0.9), metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 117ms/step - accuracy: 0.4828 - loss: 1.3778 - val_accuracy: 0.8788 - val_loss: 1.2966
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.7810 - loss: 1.2816 - val_accuracy: 0.8788 - val_loss: 1.0971
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7394 - loss: 1.1346 - val_accuracy: 0.8788 - val_loss: 0.8956
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7541 - loss: 0.9813 - val_accuracy: 0.8788 - val_loss: 0.7303
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7515 - loss: 0.8857 - val_accuracy: 0.8788 - val_loss: 0.6196
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.7715 - loss: 0.8048 - val_accuracy: 0.8788 - val_loss: 0.5572
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

### Evaluate the Model

In [5]:
from sklearn.metrics import classification_report, accuracy_score

# Predict on test data
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)
y_true_classes = y_test.argmax(axis=1)

# Calculate metrics
print(classification_report(y_true_classes, y_pred_classes, target_names=label_encoder.classes_))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
                      precision    recall  f1-score   support

               major       0.00      0.00      0.00         2
               minor       0.00      0.00      0.00         3
            moderate       0.00      0.00      0.00         1
no interaction found       0.85      1.00      0.92        35

            accuracy                           0.85        41
           macro avg       0.21      0.25      0.23        41
        weighted avg       0.73      0.85      0.79        41



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
