In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the descriptor data
df = pd.read_csv("drug_descriptors.csv")

# Check the data
df.head()

# Create a synthetic 'IC50' column for demonstration
df['IC50'] = df['LogP'] * 2 + df['MolecularWeight'] * 0.5  # Simple synthetic rule

# Features: Molecular descriptors
X = df[['MolecularWeight', 'LogP', 'NumHDonors', 'NumHAcceptors']]

# Target: IC50 (synthetic)
y = df['IC50']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Random Forest Regressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
print(f"R-squared: {r2_score(y_test, y_pred)}")
import joblib

# Save the trained model
joblib.dump(regressor, 'random_forest_model.pkl')
print("Model saved as 'random_forest_model.pkl'")
