In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',100)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import scipy.stats as stats

from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import neighbors
import xgboost as xg
from sklearn.ensemble import GradientBoostingRegressor

from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
from math import sqrt

# Hyperparameter tuner and Cross Validation
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import math

#sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
sns.set(rc={"figure.dpi":300, 'savefig.dpi':800})

In [3]:
df = pd.read_csv("df1.csv")

In [4]:
X = df.drop(['drug_perm_per','drug_perm_amt'], axis=1)
y = df[["drug_perm_per"]]
columns = []
for i in X.columns:
    columns.append(i)

# Voting Regressor

# Drug permeation percentage - Hypertuning

In [54]:
X = df.drop(['drug_perm_per','drug_perm_amt'], axis=1)
y = df[["drug_perm_per"]]

model_1 = xg.XGBRegressor()
model_2 = RandomForestRegressor(random_state=1)
model_4 = GradientBoostingRegressor()
model_5 = neighbors.KNeighborsRegressor()
final_model = VotingRegressor(estimators=[('xgb', model_1), ('rf', model_2), ('gbr', model_4), ('knn',model_5)])

params = {
    'rf__n_estimators': [10, 50, 100],
    'xgb__eta':  [0.1, 0.2, 0.5],
    'xgb__max_depth': [3, 10, 20, None],
    'knn__n_neighbors' : [1, 2],
         }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=final_model, param_grid=params, cv=cv, scoring="neg_root_mean_squared_error")
grid.fit(X,y)

print("------------------------------")
print("------------------------------")
print (grid.best_params_)
print("------------------------------")
print("RMSE score:", np.negative(np.mean(grid.best_score_)))
print("------------------------------")
print("------------------------------")

------------------------------
------------------------------
{'knn__n_neighbors': 1, 'rf__n_estimators': 100, 'xgb__eta': 0.2, 'xgb__max_depth': 10}
------------------------------
RMSE score: 3.23382275837779
------------------------------
------------------------------


# Drug permeation amount - Hypertuning

In [55]:
X = df.drop(['drug_perm_per','drug_perm_amt'], axis=1)
y = df[["drug_perm_amt"]]

model_1 = xg.XGBRegressor()
model_2 = RandomForestRegressor(random_state=1)
model_4 = GradientBoostingRegressor()
model_5 = neighbors.KNeighborsRegressor()
final_model = VotingRegressor(estimators=[('xgb', model_1), ('rf', model_2), ('gbr', model_4), ('knn',model_5)])

params = {
    'rf__n_estimators': [10, 50, 100],
    'xgb__eta':  [0.1, 0.2, 0.5],
    'xgb__max_depth': [3, 10, 20, None],
    'knn__n_neighbors' : [1, 2],
         }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=final_model, param_grid=params, cv=cv, scoring="neg_root_mean_squared_error")
grid.fit(X,y)

print("------------------------------")
print("------------------------------")
print (grid.best_params_)
print("------------------------------")
print("RMSE score:", np.negative(np.mean(grid.best_score_)))
print("------------------------------")
print("------------------------------")

------------------------------
------------------------------
{'knn__n_neighbors': 1, 'rf__n_estimators': 50, 'xgb__eta': 0.1, 'xgb__max_depth': 3}
------------------------------
RMSE score: 669.6923750654195
------------------------------
------------------------------
