In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the dataset (update 'dataset.csv' with your file path)
data = pd.read_csv("/kaggle/input/ddi-dataset/new_drug_data.csv")

# Separate features (X) and target variable (y)
X = data.drop(columns=['Level_ID'])  # All columns except 'Level_ID'
y = data['Level_ID']  # Target variable

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
model = RandomForestClassifier(random_state=42, n_estimators=100)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Feature importance
feature_importance = model.feature_importances_
print("\nFeature Importance:")
for col, importance in zip(data.columns[:-1], feature_importance):
    print(f"{col}: {importance:.4f}")


Accuracy: 88.91%

Classification Report:
               precision    recall  f1-score   support

           1       0.86      0.47      0.61       377
           2       0.90      0.97      0.94      4407
           3       0.78      0.60      0.68       706

    accuracy                           0.89      5490
   macro avg       0.85      0.68      0.74      5490
weighted avg       0.88      0.89      0.88      5490


Feature Importance:
Drug_A_ID: 0.1220
Drug_A_MolecularWeight: 0.0884
Drug_A_XLogP: 0.0892
Drug_A_ExactMass: 0.0876
Drug_A_TPSA: 0.1037
Drug_B_ID: 0.1229
Drug_B_MolecularWeight: 0.0950
Drug_B_XLogP: 0.0899
Drug_B_ExactMass: 0.0964
Drug_B_TPSA: 0.1049
