In [534]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from scipy.optimize import minimize
from scipy.stats import norm
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
class AutoMlModel:
    def __init__(self,model,train_X,train_y):
        self.model=model
        self.train_X=train_X
        self.train_y=train_y
    def preprocess_data(self):
        non_numeric_cols = self.train_X.select_dtypes(exclude=np.number).columns
        numeric_cols = self.train_X.select_dtypes(include=np.number).columns
        numeric_pipeline = Pipeline([("imputer", SimpleImputer(strategy="median")),("std_scl", StandardScaler())])
        transformer_pipeline = ColumnTransformer([("num", numeric_pipeline, numeric_cols),("non_num", OneHotEncoder(), non_numeric_cols)])
        return transformer_pipeline.fit_transform(self.train_X)
    def objective_function(self, params, model, param_grid):
        param_dict = {key: float(value) for key, value in zip(param_grid.keys(), params)}
        model_parametrized = model.set_params(**param_dict)
        y_scores = cross_val_score(model_parametrized, self.train_X, self.train_y, cv=5)
        return -np.mean(y_scores)    
        
    def kernel(self,x1,x2,length_scale=1):
        sqdist=np.sum(x1**2,1).reshape(-1,1) + np.sum(x2**2,1)-2*np.dot(x1,x2.T)
        return np.exp(-0.5/ length_scale**2*sqdist)
        
    def surrogate(self,X,X_sample,Y_sample, noise=1e-6):
        K=self.kernel(X_sample,X_sample)
        K_s=self.kernel(X_sample,X)
        K_ss=self.kernel(X,X) + noise * np.eye(len(X))
        K_inv=np.linalg.inv(K +noise * np.eye(len(X_sample)))
            
        mu=K_s.T @ K_inv @ Y_sample
        cov= K_ss- K_s.T @ K_inv @K_s
        sigma=np.sqrt(np.diag(cov))
        return mu,sigma

    def expected_improvement(self,X,X_sample,Y_sample,xi=0.01):
        mu,sigma= self.surrogate(X,X_sample,Y_sample)
        mu_sample_opt=np.min(Y_sample)
        with np.errstate(divide='warn'):
            imp=mu-mu_sample_opt -xi
            Z= imp/sigma
            ei=imp*norm.cdf(Z) + sigma * norm.pdf(Z)
            ei[sigma==0.0]=0.0
        return ei
    def propose_location(self, acquisition, X_sample, Y_sample, bounds, n_restarts=25):
        dim = X_sample.shape[1]
        min_val = float("inf")
        min_x = None

        def min_obj(X):
            return -acquisition(X.reshape(-1, dim), X_sample, Y_sample)

        for x0 in np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_restarts, dim)):
            res = minimize(min_obj, x0=x0, bounds=bounds, method='L-BFGS-B')
            if res.fun < min_val:
                min_val = res.fun
                min_x = res.x

        return min_x    
    def hyperparameterTuning(self, param_grid, n_iter):
        numerical_param_grid={key:value for key,value in param_grid.items() if all(isinstance(v,(int,float)) for v in value)}
        bounds = np.array([param_grid[key] for key in numerical_param_grid])
        X_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(5, len(numerical_param_grid)))
        Y_sample = np.array([self.objective_function(param, self.model, numerical_param_grid) for param in X_sample])
        
        
        
        for i in range(n_iter):
            X_next = self.propose_location(self.expected_improvement, X_sample, Y_sample, bounds)
            Y_next = self.objective_function(X_next, self.model, numerical_param_grid)
            
            X_sample = np.vstack((X_sample, X_next))
            Y_sample = np.append(Y_sample, Y_next)
            
            
        
        best_idx = np.argmin(Y_sample)
        best_numerical_params = X_sample[best_idx]
        
        
        return best_numerical_params
        
        


In [535]:
#Now below is the experimentation of the above model

In [536]:
#model experimentation with wine dataset below

In [537]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.svm import SVC

wine=datasets.load_wine()
X_w=pd.DataFrame(wine.data,columns=wine.feature_names)
y_w=wine.target
train_X_w,test_X_w,train_y_w,test_y_w=train_test_split(X_w,y_w,test_size=0.2)
wine_auto_model=AutoMlModel(SVC(),X_w,y_w)
wine_auto_model.preprocess_data()

array([[ 1.51861254, -0.5622498 ,  0.23205254, ...,  0.36217728,
         1.84791957,  1.01300893],
       [ 0.24628963, -0.49941338, -0.82799632, ...,  0.40605066,
         1.1134493 ,  0.96524152],
       [ 0.19687903,  0.02123125,  1.10933436, ...,  0.31830389,
         0.78858745,  1.39514818],
       ...,
       [ 0.33275817,  1.74474449, -0.38935541, ..., -1.61212515,
        -1.48544548,  0.28057537],
       [ 0.20923168,  0.22769377,  0.01273209, ..., -1.56825176,
        -1.40069891,  0.29649784],
       [ 1.39508604,  1.58316512,  1.36520822, ..., -1.52437837,
        -1.42894777, -0.59516041]])

In [538]:
# below is the cross validation score value by the model for the wine dataset with the parameters found by the hyperparameter tuning model

In [539]:
param_grid_wine = {
    'C': [0.1,100],  
    'gamma': [ 0.001, 1]
}
hyperparameter_names = list(param_grid.keys())
best_param_wine=wine_auto_model.hyperparameterTuning(param_grid_wine,15)
best_params_grid=dict(zip(hyperparameter_names, best_param_wine))
svc=SVC(**best_params_grid)

y_score=cross_val_score(svc,X_w,y_w)
print(np.mean(y_score))

0.7760317460317461


In [540]:
#below is the cross validation score for the model without hyperparameter tuning

In [541]:
y_score=cross_val_score(SVC(),X_w,y_w)
print(np.mean(y_score))

0.6634920634920635


In [542]:
from sklearn.metrics import mean_squared_error
def automl_tuner_wrapper(lr):
    automl_model = AutoMlModel(model=SVC(), train_X=train_X_w, train_y=train_y_w)    
    best_params = automl_model.hyperparameterTuning({'C': [0.1,100], 'gamma': [ 0.001, 1]}, 15)  
    model = automl_model.model.fit(train_X_w, train_y_w)
    y_pred_w = model.predict(test_X_w)
    return -mean_squared_error(test_y_w, y_pred_w)  

In [543]:
from hyperopt import fmin, tpe, hp
search_space = hp.uniform('learning_rate', low=0.001, high=1)
best_lr = fmin(fn=automl_tuner_wrapper, space=search_space, algo=tpe.suggest, max_evals=50) 

100%|███████████████████████████████████████████████| 50/50 [04:30<00:00,  5.40s/trial, best loss: -0.6388888888888888]


In [544]:
best_lr = fmin(fn=automl_tuner_wrapper, space=search_space, algo=tpe.suggest, max_evals=50) 
print(best_lr)

100%|███████████████████████████████████████████████| 50/50 [05:07<00:00,  6.15s/trial, best loss: -0.6388888888888888]
{'learning_rate': 0.37700500802163567}


In [546]:
def hyperopt_wrapper(lr):
    automl_model = AutoMlModel(model=SVC(), train_X=train_X_w, train_y=train_y_w) 
    model = automl_model.model.fit(train_X_w, train_y_w)
    y_pred = model.predict(test_X_w)
    return -mean_squared_error(test_y_w, y_pred)  
search_space = hp.uniform('learning_rate', low=0.001, high=0.1)  
best_lr_hyperopt = fmin(fn=hyperopt_wrapper, space=search_space, algo=tpe.suggest, max_evals=50)  
print(best_lr_hyperopt)

100%|██████████████████████████████████████████████| 50/50 [00:00<00:00, 130.86trial/s, best loss: -0.3333333333333333]
{'learning_rate': 0.004216567514294117}


In [468]:
#with the iris dataset

In [469]:
iris=datasets.load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
X=df
y=iris.target
X,y=shuffle(X,y,random_state=42)
train_X,test_X,train_y,test_y=train_test_split(X,y, test_size=0.2,shuffle=True)

In [470]:
AutoMlObject=AutoMlModel(SVC(),train_X,train_y)
AutoMlObject.preprocess_data()

array([[ 3.43224315e-01,  8.82080971e-01,  8.54710980e-01,
         1.40406755e+00,  1.16358890e+00],
       [ 4.57951105e-01, -5.88053981e-01,  6.81848085e-01,
         3.30039783e-01,  1.16358890e+00],
       [ 9.16858267e-01,  6.37058479e-01,  1.02757387e+00,
         1.13556061e+00,  1.16358890e+00],
       [ 1.49049222e+00, -9.80089968e-02,  1.08519484e+00,
         4.64293253e-01,  1.16358890e+00],
       [-9.18770380e-01,  8.82080971e-01, -1.39250665e+00,
        -1.41525534e+00, -1.30775036e+00],
       [ 6.87404686e-01, -9.80089968e-02,  9.12331945e-01,
         7.32800195e-01,  1.16358890e+00],
       [-3.45136428e-01, -3.43031489e-01, -1.82466389e-01,
         6.15328408e-02, -7.20807285e-02],
       [-1.14822396e+00, -9.80089968e-02, -1.45012762e+00,
        -1.41525534e+00, -1.30775036e+00],
       [ 9.16858267e-01,  1.47013495e-01,  4.51364225e-01,
         3.30039783e-01, -7.20807285e-02],
       [-9.56056587e-04, -9.80089968e-02,  1.63259401e-01,
         3.30039783e-01

In [471]:
#cross validation score and accuracy of the iris dataset by the hyperparameterTuning model is below

In [472]:
from sklearn.metrics import accuracy_score
param_grid = {
    'C': [0.1, 1],
    'gamma': [0.001, 10]
}
hyperparameter_names = list(param_grid.keys())
best_params=AutoMlObject.hyperparameterTuning(param_grid,15)
best_params_grid=dict(zip(hyperparameter_names, best_params))
svc=SVC(**best_params_grid)
svc.fit(train_X,train_y)
pred=svc.predict(test_X)
acc=accuracy_score(test_y,pred)
print(acc)


1.0


In [473]:
y_score=cross_val_score(SVC(**best_params_grid),X,y,scoring="accuracy")
print(np.mean(y_score))


1.0


In [474]:
from sklearn.metrics import precision_score,recall_score
precision=precision_score(test_y,pred,average="macro")
print(precision)
recall=recall_score(test_y,pred,average="macro")
print(recall)

1.0
1.0


In [475]:
#cross validation score without the model 

In [476]:
model=SVC()
model.fit(train_X,train_y)
y_pred=model.predict(test_X)
acc=accuracy_score(y_pred,test_y)
print(acc)
scores=cross_val_score(model,X,y)
print(np.mean(scores))

1.0
1.0


In [477]:
precision=precision_score(test_y,y_pred,average="macro")
print(precision)
recall=recall_score(test_y,y_pred,average="macro")
print(recall)

1.0
1.0
