In [None]:
from pycaret.regression import *
import pandas as pd

In [None]:
dataset = pd.read_csv("Dataset/training.csv")

data = dataset.sample(frac=0.9, random_state=786)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions ' + str(data_unseen.shape))

In [None]:
from pandas_profiling import ProfileReport
prof = ProfileReport(dataset)
prof

# Create ML models

In [None]:
exp_reg = setup(data=data,use_gpu=True, numeric_features=["H-Acceptors", "H-Donors","Electronegative Atoms", "Rotatable Bonds", "Small Rings", "Aromatic Rings"], target="pKi",session_id=123,high_cardinality_features=['Cano_SMILES','InChI_Key'],
                  remove_multicollinearity = True, multicollinearity_threshold = 0.95,log_experiment = True, experiment_name = 'ml_model',silent=True)

In [None]:
save_config('ml_setup.pkl')

# Compare and create ML models

In [None]:
best = compare_models(exclude = ['ransac'])

In [None]:
et_model = create_model('et')

In [None]:
save_model(et_model, "et_model")

# Tune a model

In [None]:
tuned_et_model = tune_model(et_model)

# Plot a model

In [None]:
plot_model(et_model, plot = 'residuals', save=True)
plot_model(et_model, plot = 'error', save=True)
plot_model(et_model, plot = 'feature', save=True)

# Prediction on unseen dataset

In [None]:
from pycaret.utils import check_metric

predict_unseen_dataset = predict_model(et_model, data=data_unseen)
check_metric(predict_unseen_dataset.pKi, predict_unseen_dataset.Label, 'R2')
predict_unseen_dataset.to_csv("predicted_unseen_dataset_et_model.csv")

# Prediction on new dataset

In [None]:
et = load_model("et_model")
new = pd.read_csv("Dataset/new.csv")

In [None]:
predict_new = predict_model(et_model, data=new)
predict_new = predict_new.drop(columns=('pKvina'))
predict_new["pKvina"] = predict_new["vina_score"]*(-0.73349)
predict_new.to_csv("predict_new.csv")